compiler/utils/Util.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The University of Glasgow 1992-2002
   4 %
   5
   6 \begin{code}
   7 -- | Highly random utility functions
   8 module Util (
   9         -- * Flags dependent on the compiler build
  10         ghciSupported, debugIsOn, ghciTablesNextToCode, isDynamicGhcLib,
  11         isWindowsHost, isWindowsTarget, isDarwinTarget,
  12
  13         -- * General list processing
  14         zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
  15         zipLazy, stretchZipWith,
  16
  17         unzipWith,
  18
  19         mapFst, mapSnd,
  20         mapAndUnzip, mapAndUnzip3,
  21         nOfThem, filterOut, partitionWith, splitEithers,
  22
  23         foldl1', foldl2, count, all2,
  24
  25         lengthExceeds, lengthIs, lengthAtLeast,
  26         listLengthCmp, atLength, equalLength, compareLength,
  27
  28         isSingleton, only, singleton,
  29         notNull, snocView,
  30
  31         isIn, isn'tIn,
  32
  33         -- * Tuples
  34         fstOf3, sndOf3, thirdOf3,
  35
  36         -- * List operations controlled by another list
  37         takeList, dropList, splitAtList, split,
  38         dropTail,
  39
  40         -- * For loop
  41         nTimes,
  42
  43         -- * Sorting
  44         sortLe, sortWith, on,
  45
  46         -- * Comparisons
  47         isEqual, eqListBy,
  48         thenCmp, cmpList,
  49         removeSpaces,
  50
  51         -- * Edit distance
  52         fuzzyMatch, fuzzyLookup,
  53
  54         -- * Transitive closures
  55         transitiveClosure,
  56
  57         -- * Strictness
  58         seqList,
  59
  60         -- * Module names
  61         looksLikeModuleName,
  62
  63         -- * Argument processing
  64         getCmd, toCmdArgs, toArgs,
  65
  66         -- * Floating point
  67         readRational,
  68
  69         -- * read helpers
  70         maybeReadFuzzy,
  71
  72         -- * IO-ish utilities
  73         createDirectoryHierarchy,
  74         doesDirNameExist,
  75         modificationTimeIfExists,
  76
  77         global, consIORef, globalMVar, globalEmptyMVar,
  78
  79         -- * Filenames and paths
  80         Suffix,
  81         splitLongestPrefix,
  82         escapeSpaces,
  83         parseSearchPath,
  84         Direction(..), reslash,
  85
  86         -- * Utils for defining Data instances
  87         abstractConstr, abstractDataType, mkNoRepType,
  88
  89         -- * Utils for printing C code
  90         charToC
  91     ) where
  92
  93 #include "HsVersions.h"
  94
  95 import Exception
  96 import Panic
  97
  98 import Data.Data
  99 import Data.IORef       ( IORef, newIORef, atomicModifyIORef )
 100 import System.IO.Unsafe ( unsafePerformIO )
 101 import Data.List        hiding (group)
 102 import Control.Concurrent.MVar ( MVar, newMVar, newEmptyMVar )
 103
 104 #ifdef DEBUG
 105 import FastTypes
 106 #endif
 107
 108 import Control.Monad    ( unless )
 109 import System.IO.Error as IO ( isDoesNotExistError )
 110 import System.Directory ( doesDirectoryExist, createDirectory,
 111                           getModificationTime )
 112 import System.FilePath
 113 import System.Time      ( ClockTime )
 114
 115 import Data.Char        ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
 116 import Data.Ratio       ( (%) )
 117 import Data.Ord         ( comparing )
 118 import Data.Bits
 119 import Data.Word
 120 import qualified Data.IntMap as IM
 121
 122 infixr 9 `thenCmp`
 123 \end{code}
 124
 125 %************************************************************************
 126 %*                                                                      *
 127 \subsection{Is DEBUG on, are we on Windows, etc?}
 128 %*                                                                      *
 129 %************************************************************************
 130
 131 These booleans are global constants, set by CPP flags.  They allow us to
 132 recompile a single module (this one) to change whether or not debug output
 133 appears. They sometimes let us avoid even running CPP elsewhere.
 134
 135 It's important that the flags are literal constants (True/False). Then,
 136 with -0, tests of the flags in other modules will simplify to the correct
 137 branch of the conditional, thereby dropping debug code altogether when
 138 the flags are off.
 139
 140 \begin{code}
 141 ghciSupported :: Bool
 142 #ifdef GHCI
 143 ghciSupported = True
 144 #else
 145 ghciSupported = False
 146 #endif
 147
 148 debugIsOn :: Bool
 149 #ifdef DEBUG
 150 debugIsOn = True
 151 #else
 152 debugIsOn = False
 153 #endif
 154
 155 ghciTablesNextToCode :: Bool
 156 #ifdef GHCI_TABLES_NEXT_TO_CODE
 157 ghciTablesNextToCode = True
 158 #else
 159 ghciTablesNextToCode = False
 160 #endif
 161
 162 isDynamicGhcLib :: Bool
 163 #ifdef DYNAMIC
 164 isDynamicGhcLib = True
 165 #else
 166 isDynamicGhcLib = False
 167 #endif
 168
 169 isWindowsHost :: Bool
 170 #ifdef mingw32_HOST_OS
 171 isWindowsHost = True
 172 #else
 173 isWindowsHost = False
 174 #endif
 175
 176 isWindowsTarget :: Bool
 177 #ifdef mingw32_TARGET_OS
 178 isWindowsTarget = True
 179 #else
 180 isWindowsTarget = False
 181 #endif
 182
 183 isDarwinTarget :: Bool
 184 #ifdef darwin_TARGET_OS
 185 isDarwinTarget = True
 186 #else
 187 isDarwinTarget = False
 188 #endif
 189 \end{code}
 190
 191 %************************************************************************
 192 %*                                                                      *
 193 \subsection{A for loop}
 194 %*                                                                      *
 195 %************************************************************************
 196
 197 \begin{code}
 198 -- | Compose a function with itself n times.  (nth rather than twice)
 199 nTimes :: Int -> (a -> a) -> (a -> a)
 200 nTimes 0 _ = id
 201 nTimes 1 f = f
 202 nTimes n f = f . nTimes (n-1) f
 203 \end{code}
 204
 205 \begin{code}
 206 fstOf3   :: (a,b,c) -> a
 207 sndOf3   :: (a,b,c) -> b
 208 thirdOf3 :: (a,b,c) -> c
 209 fstOf3      (a,_,_) =  a
 210 sndOf3      (_,b,_) =  b
 211 thirdOf3    (_,_,c) =  c
 212 \end{code}
 213
 214 %************************************************************************
 215 %*                                                                      *
 216 \subsection[Utils-lists]{General list processing}
 217 %*                                                                      *
 218 %************************************************************************
 219
 220 \begin{code}
 221 filterOut :: (a->Bool) -> [a] -> [a]
 222 -- ^ Like filter, only it reverses the sense of the test
 223 filterOut _ [] = []
 224 filterOut p (x:xs) | p x       = filterOut p xs
 225                    | otherwise = x : filterOut p xs
 226
 227 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
 228 -- ^ Uses a function to determine which of two output lists an input element should join
 229 partitionWith _ [] = ([],[])
 230 partitionWith f (x:xs) = case f x of
 231                          Left  b -> (b:bs, cs)
 232                          Right c -> (bs, c:cs)
 233     where (bs,cs) = partitionWith f xs
 234
 235 splitEithers :: [Either a b] -> ([a], [b])
 236 -- ^ Teases a list of 'Either's apart into two lists
 237 splitEithers [] = ([],[])
 238 splitEithers (e : es) = case e of
 239                         Left x -> (x:xs, ys)
 240                         Right y -> (xs, y:ys)
 241     where (xs,ys) = splitEithers es
 242 \end{code}
 243
 244 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
 245 are of equal length.  Alastair Reid thinks this should only happen if
 246 DEBUGging on; hey, why not?
 247
 248 \begin{code}
 249 zipEqual        :: String -> [a] -> [b] -> [(a,b)]
 250 zipWithEqual    :: String -> (a->b->c) -> [a]->[b]->[c]
 251 zipWith3Equal   :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
 252 zipWith4Equal   :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
 253
 254 #ifndef DEBUG
 255 zipEqual      _ = zip
 256 zipWithEqual  _ = zipWith
 257 zipWith3Equal _ = zipWith3
 258 zipWith4Equal _ = zipWith4
 259 #else
 260 zipEqual _   []     []     = []
 261 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
 262 zipEqual msg _      _      = panic ("zipEqual: unequal lists:"++msg)
 263
 264 zipWithEqual msg z (a:as) (b:bs)=  z a b : zipWithEqual msg z as bs
 265 zipWithEqual _   _ [] []        =  []
 266 zipWithEqual msg _ _ _          =  panic ("zipWithEqual: unequal lists:"++msg)
 267
 268 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
 269                                 =  z a b c : zipWith3Equal msg z as bs cs
 270 zipWith3Equal _   _ [] []  []   =  []
 271 zipWith3Equal msg _ _  _   _    =  panic ("zipWith3Equal: unequal lists:"++msg)
 272
 273 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
 274                                 =  z a b c d : zipWith4Equal msg z as bs cs ds
 275 zipWith4Equal _   _ [] [] [] [] =  []
 276 zipWith4Equal msg _ _  _  _  _  =  panic ("zipWith4Equal: unequal lists:"++msg)
 277 #endif
 278 \end{code}
 279
 280 \begin{code}
 281 -- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
 282 zipLazy :: [a] -> [b] -> [(a,b)]
 283 zipLazy []     _       = []
 284 -- We want to write this, but with GHC 6.4 we get a warning, so it
 285 -- doesn't validate:
 286 -- zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
 287 -- so we write this instead:
 288 zipLazy (x:xs) zs = let y : ys = zs
 289                     in (x,y) : zipLazy xs ys
 290 \end{code}
 291
 292
 293 \begin{code}
 294 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
 295 -- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
 296 -- the places where @p@ returns @True@
 297
 298 stretchZipWith _ _ _ []     _ = []
 299 stretchZipWith p z f (x:xs) ys
 300   | p x       = f x z : stretchZipWith p z f xs ys
 301   | otherwise = case ys of
 302                 []     -> []
 303                 (y:ys) -> f x y : stretchZipWith p z f xs ys
 304 \end{code}
 305
 306
 307 \begin{code}
 308 mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
 309 mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
 310
 311 mapFst f xys = [(f x, y) | (x,y) <- xys]
 312 mapSnd f xys = [(x, f y) | (x,y) <- xys]
 313
 314 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
 315
 316 mapAndUnzip _ [] = ([], [])
 317 mapAndUnzip f (x:xs)
 318   = let (r1,  r2)  = f x
 319         (rs1, rs2) = mapAndUnzip f xs
 320     in
 321     (r1:rs1, r2:rs2)
 322
 323 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
 324
 325 mapAndUnzip3 _ [] = ([], [], [])
 326 mapAndUnzip3 f (x:xs)
 327   = let (r1,  r2,  r3)  = f x
 328         (rs1, rs2, rs3) = mapAndUnzip3 f xs
 329     in
 330     (r1:rs1, r2:rs2, r3:rs3)
 331 \end{code}
 332
 333 \begin{code}
 334 nOfThem :: Int -> a -> [a]
 335 nOfThem n thing = replicate n thing
 336
 337 -- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
 338 --
 339 -- @
 340 --  atLength atLenPred atEndPred ls n
 341 --   | n < 0         = atLenPred n
 342 --   | length ls < n = atEndPred (n - length ls)
 343 --   | otherwise     = atLenPred (drop n ls)
 344 -- @
 345 atLength :: ([a] -> b)
 346          -> (Int -> b)
 347          -> [a]
 348          -> Int
 349          -> b
 350 atLength atLenPred atEndPred ls n
 351   | n < 0     = atEndPred n
 352   | otherwise = go n ls
 353   where
 354     go n [] = atEndPred n
 355     go 0 ls = atLenPred ls
 356     go n (_:xs) = go (n-1) xs
 357
 358 -- Some special cases of atLength:
 359
 360 lengthExceeds :: [a] -> Int -> Bool
 361 -- ^ > (lengthExceeds xs n) = (length xs > n)
 362 lengthExceeds = atLength notNull (const False)
 363
 364 lengthAtLeast :: [a] -> Int -> Bool
 365 lengthAtLeast = atLength notNull (== 0)
 366
 367 lengthIs :: [a] -> Int -> Bool
 368 lengthIs = atLength null (==0)
 369
 370 listLengthCmp :: [a] -> Int -> Ordering
 371 listLengthCmp = atLength atLen atEnd
 372  where
 373   atEnd 0      = EQ
 374   atEnd x
 375    | x > 0     = LT -- not yet seen 'n' elts, so list length is < n.
 376    | otherwise = GT
 377
 378   atLen []     = EQ
 379   atLen _      = GT
 380
 381 equalLength :: [a] -> [b] -> Bool
 382 equalLength []     []     = True
 383 equalLength (_:xs) (_:ys) = equalLength xs ys
 384 equalLength _      _      = False
 385
 386 compareLength :: [a] -> [b] -> Ordering
 387 compareLength []     []     = EQ
 388 compareLength (_:xs) (_:ys) = compareLength xs ys
 389 compareLength []     _      = LT
 390 compareLength _      []     = GT
 391
 392 ----------------------------
 393 singleton :: a -> [a]
 394 singleton x = [x]
 395
 396 isSingleton :: [a] -> Bool
 397 isSingleton [_] = True
 398 isSingleton _   = False
 399
 400 notNull :: [a] -> Bool
 401 notNull [] = False
 402 notNull _  = True
 403
 404 only :: [a] -> a
 405 #ifdef DEBUG
 406 only [a] = a
 407 #else
 408 only (a:_) = a
 409 #endif
 410 only _ = panic "Util: only"
 411 \end{code}
 412
 413 Debugging/specialising versions of \tr{elem} and \tr{notElem}
 414
 415 \begin{code}
 416 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
 417
 418 # ifndef DEBUG
 419 isIn    _msg x ys = x `elem` ys
 420 isn'tIn _msg x ys = x `notElem` ys
 421
 422 # else /* DEBUG */
 423 isIn msg x ys
 424   = elem100 (_ILIT(0)) x ys
 425   where
 426     elem100 _ _ []        = False
 427     elem100 i x (y:ys)
 428       | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
 429                                 (x `elem` (y:ys))
 430       | otherwise       = x == y || elem100 (i +# _ILIT(1)) x ys
 431
 432 isn'tIn msg x ys
 433   = notElem100 (_ILIT(0)) x ys
 434   where
 435     notElem100 _ _ [] =  True
 436     notElem100 i x (y:ys)
 437       | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
 438                                 (x `notElem` (y:ys))
 439       | otherwise      =  x /= y && notElem100 (i +# _ILIT(1)) x ys
 440 # endif /* DEBUG */
 441 \end{code}
 442
 443 %************************************************************************
 444 %*                                                                      *
 445 \subsubsection[Utils-Carsten-mergesort]{A mergesort from Carsten}
 446 %*                                                                      *
 447 %************************************************************************
 448
 449 \begin{display}
 450 Date: Mon, 3 May 93 20:45:23 +0200
 451 From: Carsten Kehler Holst <kehler@cs.chalmers.se>
 452 To: partain@dcs.gla.ac.uk
 453 Subject: natural merge sort beats quick sort [ and it is prettier ]
 454
 455 Here is a piece of Haskell code that I'm rather fond of. See it as an
 456 attempt to get rid of the ridiculous quick-sort routine. group is
 457 quite useful by itself I think it was John's idea originally though I
 458 believe the lazy version is due to me [surprisingly complicated].
 459 gamma [used to be called] is called gamma because I got inspired by
 460 the Gamma calculus. It is not very close to the calculus but does
 461 behave less sequentially than both foldr and foldl. One could imagine
 462 a version of gamma that took a unit element as well thereby avoiding
 463 the problem with empty lists.
 464
 465 I've tried this code against
 466
 467    1) insertion sort - as provided by haskell
 468    2) the normal implementation of quick sort
 469    3) a deforested version of quick sort due to Jan Sparud
 470    4) a super-optimized-quick-sort of Lennart's
 471
 472 If the list is partially sorted both merge sort and in particular
 473 natural merge sort wins. If the list is random [ average length of
 474 rising subsequences = approx 2 ] mergesort still wins and natural
 475 merge sort is marginally beaten by Lennart's soqs. The space
 476 consumption of merge sort is a bit worse than Lennart's quick sort
 477 approx a factor of 2. And a lot worse if Sparud's bug-fix [see his
 478 fpca article ] isn't used because of group.
 479
 480 have fun
 481 Carsten
 482 \end{display}
 483
 484 \begin{code}
 485 group :: (a -> a -> Bool) -> [a] -> [[a]]
 486 -- Given a <= function, group finds maximal contiguous up-runs
 487 -- or down-runs in the input list.
 488 -- It's stable, in the sense that it never re-orders equal elements
 489 --
 490 -- Date: Mon, 12 Feb 1996 15:09:41 +0000
 491 -- From: Andy Gill <andy@dcs.gla.ac.uk>
 492 -- Here is a `better' definition of group.
 493
 494 group _ []     = []
 495 group p (x:xs) = group' xs x x (x :)
 496   where
 497     group' []     _     _     s  = [s []]
 498     group' (x:xs) x_min x_max s
 499         |      x_max `p` x  = group' xs x_min x     (s . (x :))
 500         | not (x_min `p` x) = group' xs x     x_max ((x :) . s)
 501         | otherwise         = s [] : group' xs x x (x :)
 502         -- NB: the 'not' is essential for stablity
 503         --     x `p` x_min would reverse equal elements
 504
 505 generalMerge :: (a -> a -> Bool) -> [a] -> [a] -> [a]
 506 generalMerge _ xs [] = xs
 507 generalMerge _ [] ys = ys
 508 generalMerge p (x:xs) (y:ys) | x `p` y   = x : generalMerge p xs     (y:ys)
 509                              | otherwise = y : generalMerge p (x:xs) ys
 510
 511 -- gamma is now called balancedFold
 512
 513 balancedFold :: (a -> a -> a) -> [a] -> a
 514 balancedFold _ [] = error "can't reduce an empty list using balancedFold"
 515 balancedFold _ [x] = x
 516 balancedFold f l  = balancedFold f (balancedFold' f l)
 517
 518 balancedFold' :: (a -> a -> a) -> [a] -> [a]
 519 balancedFold' f (x:y:xs) = f x y : balancedFold' f xs
 520 balancedFold' _ xs = xs
 521
 522 generalNaturalMergeSort :: (a -> a -> Bool) -> [a] -> [a]
 523 generalNaturalMergeSort _ [] = []
 524 generalNaturalMergeSort p xs = (balancedFold (generalMerge p) . group p) xs
 525
 526 #if NOT_USED
 527 generalMergeSort p [] = []
 528 generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs
 529
 530 mergeSort, naturalMergeSort :: Ord a => [a] -> [a]
 531
 532 mergeSort = generalMergeSort (<=)
 533 naturalMergeSort = generalNaturalMergeSort (<=)
 534
 535 mergeSortLe le = generalMergeSort le
 536 #endif
 537
 538 sortLe :: (a->a->Bool) -> [a] -> [a]
 539 sortLe le = generalNaturalMergeSort le
 540
 541 sortWith :: Ord b => (a->b) -> [a] -> [a]
 542 sortWith get_key xs = sortLe le xs
 543   where
 544     x `le` y = get_key x < get_key y
 545
 546 on :: (a -> a -> c) -> (b -> a) -> b -> b -> c
 547 on cmp sel = \x y -> sel x `cmp` sel y
 548
 549 \end{code}
 550
 551 %************************************************************************
 552 %*                                                                      *
 553 \subsection[Utils-transitive-closure]{Transitive closure}
 554 %*                                                                      *
 555 %************************************************************************
 556
 557 This algorithm for transitive closure is straightforward, albeit quadratic.
 558
 559 \begin{code}
 560 transitiveClosure :: (a -> [a])         -- Successor function
 561                   -> (a -> a -> Bool)   -- Equality predicate
 562                   -> [a]
 563                   -> [a]                -- The transitive closure
 564
 565 transitiveClosure succ eq xs
 566  = go [] xs
 567  where
 568    go done []                      = done
 569    go done (x:xs) | x `is_in` done = go done xs
 570                   | otherwise      = go (x:done) (succ x ++ xs)
 571
 572    _ `is_in` []                 = False
 573    x `is_in` (y:ys) | eq x y    = True
 574                     | otherwise = x `is_in` ys
 575 \end{code}
 576
 577 %************************************************************************
 578 %*                                                                      *
 579 \subsection[Utils-accum]{Accumulating}
 580 %*                                                                      *
 581 %************************************************************************
 582
 583 A combination of foldl with zip.  It works with equal length lists.
 584
 585 \begin{code}
 586 foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
 587 foldl2 _ z [] [] = z
 588 foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
 589 foldl2 _ _ _      _      = panic "Util: foldl2"
 590
 591 all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
 592 -- True if the lists are the same length, and
 593 -- all corresponding elements satisfy the predicate
 594 all2 _ []     []     = True
 595 all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
 596 all2 _ _      _      = False
 597 \end{code}
 598
 599 Count the number of times a predicate is true
 600
 601 \begin{code}
 602 count :: (a -> Bool) -> [a] -> Int
 603 count _ [] = 0
 604 count p (x:xs) | p x       = 1 + count p xs
 605                | otherwise = count p xs
 606 \end{code}
 607
 608 @splitAt@, @take@, and @drop@ but with length of another
 609 list giving the break-off point:
 610
 611 \begin{code}
 612 takeList :: [b] -> [a] -> [a]
 613 takeList [] _ = []
 614 takeList (_:xs) ls =
 615    case ls of
 616      [] -> []
 617      (y:ys) -> y : takeList xs ys
 618
 619 dropList :: [b] -> [a] -> [a]
 620 dropList [] xs    = xs
 621 dropList _  xs@[] = xs
 622 dropList (_:xs) (_:ys) = dropList xs ys
 623
 624
 625 splitAtList :: [b] -> [a] -> ([a], [a])
 626 splitAtList [] xs     = ([], xs)
 627 splitAtList _ xs@[]   = (xs, xs)
 628 splitAtList (_:xs) (y:ys) = (y:ys', ys'')
 629     where
 630       (ys', ys'') = splitAtList xs ys
 631
 632 -- drop from the end of a list
 633 dropTail :: Int -> [a] -> [a]
 634 dropTail n = reverse . drop n . reverse
 635
 636 snocView :: [a] -> Maybe ([a],a)
 637         -- Split off the last element
 638 snocView [] = Nothing
 639 snocView xs = go [] xs
 640             where
 641                 -- Invariant: second arg is non-empty
 642               go acc [x]    = Just (reverse acc, x)
 643               go acc (x:xs) = go (x:acc) xs
 644               go _   []     = panic "Util: snocView"
 645
 646 split :: Char -> String -> [String]
 647 split c s = case rest of
 648                 []     -> [chunk]
 649                 _:rest -> chunk : split c rest
 650   where (chunk, rest) = break (==c) s
 651 \end{code}
 652
 653
 654 %************************************************************************
 655 %*                                                                      *
 656 \subsection[Utils-comparison]{Comparisons}
 657 %*                                                                      *
 658 %************************************************************************
 659
 660 \begin{code}
 661 isEqual :: Ordering -> Bool
 662 -- Often used in (isEqual (a `compare` b))
 663 isEqual GT = False
 664 isEqual EQ = True
 665 isEqual LT = False
 666
 667 thenCmp :: Ordering -> Ordering -> Ordering
 668 {-# INLINE thenCmp #-}
 669 thenCmp EQ       ordering = ordering
 670 thenCmp ordering _        = ordering
 671
 672 eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
 673 eqListBy _  []     []     = True
 674 eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
 675 eqListBy _  _      _      = False
 676
 677 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
 678     -- `cmpList' uses a user-specified comparer
 679
 680 cmpList _   []     [] = EQ
 681 cmpList _   []     _  = LT
 682 cmpList _   _      [] = GT
 683 cmpList cmp (a:as) (b:bs)
 684   = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
 685 \end{code}
 686
 687 \begin{code}
 688 removeSpaces :: String -> String
 689 removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace
 690 \end{code}
 691
 692 %************************************************************************
 693 %*                                                                      *
 694 \subsection{Edit distance}
 695 %*                                                                      *
 696 %************************************************************************
 697
 698 \begin{code}
 699 -- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
 700 -- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
 701 -- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
 702 -- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
 703 -- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
 704 --     http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
 705 restrictedDamerauLevenshteinDistance :: String -> String -> Int
 706 restrictedDamerauLevenshteinDistance str1 str2
 707   = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
 708   where
 709     m = length str1
 710     n = length str2
 711
 712 restrictedDamerauLevenshteinDistanceWithLengths
 713   :: Int -> Int -> String -> String -> Int
 714 restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
 715   | m <= n
 716   = if n <= 32 -- n must be larger so this check is sufficient
 717     then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
 718     else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
 719
 720   | otherwise
 721   = if m <= 32 -- m must be larger so this check is sufficient
 722     then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
 723     else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
 724
 725 restrictedDamerauLevenshteinDistance'
 726   :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int
 727 restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
 728   | [] <- str1 = n
 729   | otherwise  = extractAnswer $
 730                  foldl' (restrictedDamerauLevenshteinDistanceWorker
 731                              (matchVectors str1) top_bit_mask vector_mask)
 732                         (0, 0, m_ones, 0, m) str2
 733   where
 734     m_ones@vector_mask = (2 ^ m) - 1
 735     top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
 736     extractAnswer (_, _, _, _, distance) = distance
 737
 738 restrictedDamerauLevenshteinDistanceWorker
 739       :: (Bits bv) => IM.IntMap bv -> bv -> bv
 740       -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
 741 restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
 742                                            (pm, d0, vp, vn, distance) char2
 743   = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
 744     seq pm' $ seq d0' $ seq vp' $ seq vn' $
 745     seq distance'' $ seq char2 $
 746     (pm', d0', vp', vn', distance'')
 747   where
 748     pm' = IM.findWithDefault 0 (ord char2) str1_mvs
 749
 750     d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
 751       .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
 752           -- No need to mask the shiftL because of the restricted range of pm
 753
 754     hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
 755     hn' = d0' .&. vp
 756
 757     hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
 758     hn'_shift = (hn' `shiftL` 1) .&. vector_mask
 759     vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
 760     vn' = d0' .&. hp'_shift
 761
 762     distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
 763     distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
 764
 765 sizedComplement :: Bits bv => bv -> bv -> bv
 766 sizedComplement vector_mask vect = vector_mask `xor` vect
 767
 768 matchVectors :: Bits bv => String -> IM.IntMap bv
 769 matchVectors = snd . foldl' go (0 :: Int, IM.empty)
 770   where
 771     go (ix, im) char = let ix' = ix + 1
 772                            im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
 773                        in seq ix' $ seq im' $ (ix', im')
 774
 775 #ifdef __GLASGOW_HASKELL__
 776 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
 777                       :: Word32 -> Int -> Int -> String -> String -> Int #-}
 778 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
 779                       :: Integer -> Int -> Int -> String -> String -> Int #-}
 780
 781 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
 782                :: IM.IntMap Word32 -> Word32 -> Word32
 783                -> (Word32, Word32, Word32, Word32, Int)
 784                -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
 785 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
 786                :: IM.IntMap Integer -> Integer -> Integer
 787                -> (Integer, Integer, Integer, Integer, Int)
 788                -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
 789
 790 {-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
 791 {-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
 792
 793 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
 794 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
 795 #endif
 796
 797 fuzzyMatch :: String -> [String] -> [String]
 798 fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
 799
 800 -- | Search for possible matches to the users input in the given list,
 801 -- returning a small number of ranked results
 802 fuzzyLookup :: String -> [(String,a)] -> [a]
 803 fuzzyLookup user_entered possibilites
 804   = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
 805     [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
 806                        , let distance = restrictedDamerauLevenshteinDistance
 807                                             poss_str user_entered
 808                        , distance <= fuzzy_threshold ]
 809   where
 810     -- Work out an approriate match threshold:
 811     -- We report a candidate if its edit distance is <= the threshold,
 812     -- The threshhold is set to about a quarter of the # of characters the user entered
 813     --   Length    Threshold
 814     --     1         0          -- Don't suggest *any* candidates
 815     --     2         1          -- for single-char identifiers
 816     --     3         1
 817     --     4         1
 818     --     5         1
 819     --     6         2
 820     --
 821     fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
 822     mAX_RESULTS = 3
 823 \end{code}
 824
 825 %************************************************************************
 826 %*                                                                      *
 827 \subsection[Utils-pairs]{Pairs}
 828 %*                                                                      *
 829 %************************************************************************
 830
 831 \begin{code}
 832 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
 833 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
 834 \end{code}
 835
 836 \begin{code}
 837 seqList :: [a] -> b -> b
 838 seqList [] b = b
 839 seqList (x:xs) b = x `seq` seqList xs b
 840 \end{code}
 841
 842 Global variables:
 843
 844 \begin{code}
 845 global :: a -> IORef a
 846 global a = unsafePerformIO (newIORef a)
 847 \end{code}
 848
 849 \begin{code}
 850 consIORef :: IORef [a] -> a -> IO ()
 851 consIORef var x = do
 852   atomicModifyIORef var (\xs -> (x:xs,()))
 853 \end{code}
 854
 855 \begin{code}
 856 globalMVar :: a -> MVar a
 857 globalMVar a = unsafePerformIO (newMVar a)
 858
 859 globalEmptyMVar :: MVar a
 860 globalEmptyMVar = unsafePerformIO newEmptyMVar
 861 \end{code}
 862
 863 Module names:
 864
 865 \begin{code}
 866 looksLikeModuleName :: String -> Bool
 867 looksLikeModuleName [] = False
 868 looksLikeModuleName (c:cs) = isUpper c && go cs
 869   where go [] = True
 870         go ('.':cs) = looksLikeModuleName cs
 871         go (c:cs)   = (isAlphaNum c || c == '_' || c == '\'') && go cs
 872 \end{code}
 873
 874 Akin to @Prelude.words@, but acts like the Bourne shell, treating
 875 quoted strings as Haskell Strings, and also parses Haskell [String]
 876 syntax.
 877
 878 \begin{code}
 879 getCmd :: String -> Either String             -- Error
 880                            (String, String) -- (Cmd, Rest)
 881 getCmd s = case break isSpace $ dropWhile isSpace s of
 882            ([], _) -> Left ("Couldn't find command in " ++ show s)
 883            res -> Right res
 884
 885 toCmdArgs :: String -> Either String             -- Error
 886                               (String, [String]) -- (Cmd, Args)
 887 toCmdArgs s = case getCmd s of
 888               Left err -> Left err
 889               Right (cmd, s') -> case toArgs s' of
 890                                  Left err -> Left err
 891                                  Right args -> Right (cmd, args)
 892
 893 toArgs :: String -> Either String   -- Error
 894                            [String] -- Args
 895 toArgs str
 896     = case dropWhile isSpace str of
 897       s@('[':_) -> case reads s of
 898                    [(args, spaces)]
 899                     | all isSpace spaces ->
 900                        Right args
 901                    _ ->
 902                        Left ("Couldn't read " ++ show str ++ "as [String]")
 903       s -> toArgs' s
 904  where
 905   toArgs' s = case dropWhile isSpace s of
 906               [] -> Right []
 907               ('"' : _) -> case reads s of
 908                            [(arg, rest)]
 909                               -- rest must either be [] or start with a space
 910                             | all isSpace (take 1 rest) ->
 911                                case toArgs' rest of
 912                                Left err -> Left err
 913                                Right args -> Right (arg : args)
 914                            _ ->
 915                                Left ("Couldn't read " ++ show s ++ "as String")
 916               s' -> case break isSpace s' of
 917                     (arg, s'') -> case toArgs' s'' of
 918                                   Left err -> Left err
 919                                   Right args -> Right (arg : args)
 920 \end{code}
 921
 922 -- -----------------------------------------------------------------------------
 923 -- Floats
 924
 925 \begin{code}
 926 readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
 927 readRational__ r = do
 928      (n,d,s) <- readFix r
 929      (k,t)   <- readExp s
 930      return ((n%1)*10^^(k-d), t)
 931  where
 932      readFix r = do
 933         (ds,s)  <- lexDecDigits r
 934         (ds',t) <- lexDotDigits s
 935         return (read (ds++ds'), length ds', t)
 936
 937      readExp (e:s) | e `elem` "eE" = readExp' s
 938      readExp s                     = return (0,s)
 939
 940      readExp' ('+':s) = readDec s
 941      readExp' ('-':s) = do (k,t) <- readDec s
 942                            return (-k,t)
 943      readExp' s       = readDec s
 944
 945      readDec s = do
 946         (ds,r) <- nonnull isDigit s
 947         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
 948                 r)
 949
 950      lexDecDigits = nonnull isDigit
 951
 952      lexDotDigits ('.':s) = return (span isDigit s)
 953      lexDotDigits s       = return ("",s)
 954
 955      nonnull p s = do (cs@(_:_),t) <- return (span p s)
 956                       return (cs,t)
 957
 958 readRational :: String -> Rational -- NB: *does* handle a leading "-"
 959 readRational top_s
 960   = case top_s of
 961       '-' : xs -> - (read_me xs)
 962       xs       -> read_me xs
 963   where
 964     read_me s
 965       = case (do { (x,"") <- readRational__ s ; return x }) of
 966           [x] -> x
 967           []  -> error ("readRational: no parse:"        ++ top_s)
 968           _   -> error ("readRational: ambiguous parse:" ++ top_s)
 969
 970
 971 -----------------------------------------------------------------------------
 972 -- read helpers
 973
 974 maybeReadFuzzy :: Read a => String -> Maybe a
 975 maybeReadFuzzy str = case reads str of
 976                      [(x, s)]
 977                       | all isSpace s ->
 978                          Just x
 979                      _ ->
 980                          Nothing
 981
 982 -----------------------------------------------------------------------------
 983 -- Create a hierarchy of directories
 984
 985 createDirectoryHierarchy :: FilePath -> IO ()
 986 createDirectoryHierarchy dir | isDrive dir = return () -- XXX Hack
 987 createDirectoryHierarchy dir = do
 988   b <- doesDirectoryExist dir
 989   unless b $ do createDirectoryHierarchy (takeDirectory dir)
 990                 createDirectory dir
 991
 992 -----------------------------------------------------------------------------
 993 -- Verify that the 'dirname' portion of a FilePath exists.
 994 --
 995 doesDirNameExist :: FilePath -> IO Bool
 996 doesDirNameExist fpath = case takeDirectory fpath of
 997                          "" -> return True -- XXX Hack
 998                          _  -> doesDirectoryExist (takeDirectory fpath)
 999
1000 -- --------------------------------------------------------------
1001 -- check existence & modification time at the same time
1002
1003 modificationTimeIfExists :: FilePath -> IO (Maybe ClockTime)
1004 modificationTimeIfExists f = do
1005   (do t <- getModificationTime f; return (Just t))
1006         `catchIO` \e -> if isDoesNotExistError e
1007                         then return Nothing
1008                         else ioError e
1009
1010 -- split a string at the last character where 'pred' is True,
1011 -- returning a pair of strings. The first component holds the string
1012 -- up (but not including) the last character for which 'pred' returned
1013 -- True, the second whatever comes after (but also not including the
1014 -- last character).
1015 --
1016 -- If 'pred' returns False for all characters in the string, the original
1017 -- string is returned in the first component (and the second one is just
1018 -- empty).
1019 splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
1020 splitLongestPrefix str pred
1021   | null r_pre = (str,           [])
1022   | otherwise  = (reverse (tail r_pre), reverse r_suf)
1023                            -- 'tail' drops the char satisfying 'pred'
1024   where (r_suf, r_pre) = break pred (reverse str)
1025
1026 escapeSpaces :: String -> String
1027 escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
1028
1029 type Suffix = String
1030
1031 --------------------------------------------------------------
1032 -- * Search path
1033 --------------------------------------------------------------
1034
1035 -- | The function splits the given string to substrings
1036 -- using the 'searchPathSeparator'.
1037 parseSearchPath :: String -> [FilePath]
1038 parseSearchPath path = split path
1039   where
1040     split :: String -> [String]
1041     split s =
1042       case rest' of
1043         []     -> [chunk]
1044         _:rest -> chunk : split rest
1045       where
1046         chunk =
1047           case chunk' of
1048 #ifdef mingw32_HOST_OS
1049             ('\"':xs@(_:_)) | last xs == '\"' -> init xs
1050 #endif
1051             _                                 -> chunk'
1052
1053         (chunk', rest') = break isSearchPathSeparator s
1054
1055 data Direction = Forwards | Backwards
1056
1057 reslash :: Direction -> FilePath -> FilePath
1058 reslash d = f
1059     where f ('/'  : xs) = slash : f xs
1060           f ('\\' : xs) = slash : f xs
1061           f (x    : xs) = x     : f xs
1062           f ""          = ""
1063           slash = case d of
1064                   Forwards -> '/'
1065                   Backwards -> '\\'
1066 \end{code}
1067
1068 %************************************************************************
1069 %*                                                                      *
1070 \subsection[Utils-Data]{Utils for defining Data instances}
1071 %*                                                                      *
1072 %************************************************************************
1073
1074 These functions helps us to define Data instances for abstract types.
1075
1076 \begin{code}
1077 abstractConstr :: String -> Constr
1078 abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
1079 \end{code}
1080
1081 \begin{code}
1082 abstractDataType :: String -> DataType
1083 abstractDataType n = mkDataType n [abstractConstr n]
1084 \end{code}
1085
1086 %************************************************************************
1087 %*                                                                      *
1088 \subsection[Utils-C]{Utils for printing C code}
1089 %*                                                                      *
1090 %************************************************************************
1091
1092 \begin{code}
1093 charToC :: Word8 -> String
1094 charToC w =
1095   case chr (fromIntegral w) of
1096         '\"' -> "\\\""
1097         '\'' -> "\\\'"
1098         '\\' -> "\\\\"
1099         c | c >= ' ' && c <= '~' -> [c]
1100           | otherwise -> ['\\',
1101                          chr (ord '0' + ord c `div` 64),
1102                          chr (ord '0' + ord c `div` 8 `mod` 8),
1103                          chr (ord '0' + ord c         `mod` 8)]
1104 \end{code}