compiler/utils/Util.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The University of Glasgow 1992-2002
   4 %
   5
   6 \begin{code}
   7 -- | Highly random utility functions
   8 module Util (
   9         -- * Flags dependent on the compiler build
  10         ghciSupported, debugIsOn, ghciTablesNextToCode, isDynamicGhcLib,
  11         isWindowsHost, isWindowsTarget, isDarwinTarget,
  12
  13         -- * General list processing
  14         zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
  15         zipLazy, stretchZipWith,
  16
  17         unzipWith,
  18
  19         mapFst, mapSnd,
  20         mapAndUnzip, mapAndUnzip3,
  21         nOfThem, filterOut, partitionWith, splitEithers,
  22
  23         foldl1', foldl2, count, all2,
  24
  25         lengthExceeds, lengthIs, lengthAtLeast,
  26         listLengthCmp, atLength, equalLength, compareLength,
  27
  28         isSingleton, only, singleton,
  29         notNull, snocView,
  30
  31         isIn, isn'tIn,
  32
  33         -- * Tuples
  34         fstOf3, sndOf3, thirdOf3,
  35
  36         -- * List operations controlled by another list
  37         takeList, dropList, splitAtList, split,
  38         dropTail,
  39
  40         -- * For loop
  41         nTimes,
  42
  43         -- * Sorting
  44         sortLe, sortWith, on,
  45
  46         -- * Comparisons
  47         isEqual, eqListBy,
  48         thenCmp, cmpList,
  49         removeSpaces,
  50
  51         -- * Edit distance
  52         fuzzyMatch, fuzzyLookup,
  53
  54         -- * Transitive closures
  55         transitiveClosure,
  56
  57         -- * Strictness
  58         seqList,
  59
  60         -- * Module names
  61         looksLikeModuleName,
  62
  63         -- * Argument processing
  64         getCmd, toCmdArgs, toArgs,
  65
  66         -- * Floating point
  67         readRational,
  68
  69         -- * IO-ish utilities
  70         createDirectoryHierarchy,
  71         doesDirNameExist,
  72         modificationTimeIfExists,
  73
  74         global, consIORef, globalMVar, globalEmptyMVar,
  75
  76         -- * Filenames and paths
  77         Suffix,
  78         splitLongestPrefix,
  79         escapeSpaces,
  80         parseSearchPath,
  81         Direction(..), reslash,
  82
  83         -- * Utils for defining Data instances
  84         abstractConstr, abstractDataType, mkNoRepType
  85     ) where
  86
  87 #include "HsVersions.h"
  88
  89 import Exception
  90 import Panic
  91
  92 import Data.Data
  93 import Data.IORef       ( IORef, newIORef, atomicModifyIORef )
  94 import System.IO.Unsafe ( unsafePerformIO )
  95 import Data.List        hiding (group)
  96 import Control.Concurrent.MVar ( MVar, newMVar, newEmptyMVar )
  97
  98 #ifdef DEBUG
  99 import FastTypes
 100 #endif
 101
 102 import Control.Monad    ( unless )
 103 import System.IO.Error as IO ( isDoesNotExistError )
 104 import System.Directory ( doesDirectoryExist, createDirectory,
 105                           getModificationTime )
 106 import System.FilePath
 107 import System.Time      ( ClockTime )
 108
 109 import Data.Char        ( isUpper, isAlphaNum, isSpace, ord, isDigit )
 110 import Data.Ratio       ( (%) )
 111 import Data.Ord         ( comparing )
 112 import Data.Bits
 113 import Data.Word
 114 import qualified Data.IntMap as IM
 115
 116 infixr 9 `thenCmp`
 117 \end{code}
 118
 119 %************************************************************************
 120 %*                                                                      *
 121 \subsection{Is DEBUG on, are we on Windows, etc?}
 122 %*                                                                      *
 123 %************************************************************************
 124
 125 These booleans are global constants, set by CPP flags.  They allow us to
 126 recompile a single module (this one) to change whether or not debug output
 127 appears. They sometimes let us avoid even running CPP elsewhere.
 128
 129 It's important that the flags are literal constants (True/False). Then,
 130 with -0, tests of the flags in other modules will simplify to the correct
 131 branch of the conditional, thereby dropping debug code altogether when
 132 the flags are off.
 133
 134 \begin{code}
 135 ghciSupported :: Bool
 136 #ifdef GHCI
 137 ghciSupported = True
 138 #else
 139 ghciSupported = False
 140 #endif
 141
 142 debugIsOn :: Bool
 143 #ifdef DEBUG
 144 debugIsOn = True
 145 #else
 146 debugIsOn = False
 147 #endif
 148
 149 ghciTablesNextToCode :: Bool
 150 #ifdef GHCI_TABLES_NEXT_TO_CODE
 151 ghciTablesNextToCode = True
 152 #else
 153 ghciTablesNextToCode = False
 154 #endif
 155
 156 isDynamicGhcLib :: Bool
 157 #ifdef DYNAMIC
 158 isDynamicGhcLib = True
 159 #else
 160 isDynamicGhcLib = False
 161 #endif
 162
 163 isWindowsHost :: Bool
 164 #ifdef mingw32_HOST_OS
 165 isWindowsHost = True
 166 #else
 167 isWindowsHost = False
 168 #endif
 169
 170 isWindowsTarget :: Bool
 171 #ifdef mingw32_TARGET_OS
 172 isWindowsTarget = True
 173 #else
 174 isWindowsTarget = False
 175 #endif
 176
 177 isDarwinTarget :: Bool
 178 #ifdef darwin_TARGET_OS
 179 isDarwinTarget = True
 180 #else
 181 isDarwinTarget = False
 182 #endif
 183 \end{code}
 184
 185 %************************************************************************
 186 %*                                                                      *
 187 \subsection{A for loop}
 188 %*                                                                      *
 189 %************************************************************************
 190
 191 \begin{code}
 192 -- | Compose a function with itself n times.  (nth rather than twice)
 193 nTimes :: Int -> (a -> a) -> (a -> a)
 194 nTimes 0 _ = id
 195 nTimes 1 f = f
 196 nTimes n f = f . nTimes (n-1) f
 197 \end{code}
 198
 199 \begin{code}
 200 fstOf3   :: (a,b,c) -> a
 201 sndOf3   :: (a,b,c) -> b
 202 thirdOf3 :: (a,b,c) -> c
 203 fstOf3      (a,_,_) =  a
 204 sndOf3      (_,b,_) =  b
 205 thirdOf3    (_,_,c) =  c
 206 \end{code}
 207
 208 %************************************************************************
 209 %*                                                                      *
 210 \subsection[Utils-lists]{General list processing}
 211 %*                                                                      *
 212 %************************************************************************
 213
 214 \begin{code}
 215 filterOut :: (a->Bool) -> [a] -> [a]
 216 -- ^ Like filter, only it reverses the sense of the test
 217 filterOut _ [] = []
 218 filterOut p (x:xs) | p x       = filterOut p xs
 219                    | otherwise = x : filterOut p xs
 220
 221 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
 222 -- ^ Uses a function to determine which of two output lists an input element should join
 223 partitionWith _ [] = ([],[])
 224 partitionWith f (x:xs) = case f x of
 225                          Left  b -> (b:bs, cs)
 226                          Right c -> (bs, c:cs)
 227     where (bs,cs) = partitionWith f xs
 228
 229 splitEithers :: [Either a b] -> ([a], [b])
 230 -- ^ Teases a list of 'Either's apart into two lists
 231 splitEithers [] = ([],[])
 232 splitEithers (e : es) = case e of
 233                         Left x -> (x:xs, ys)
 234                         Right y -> (xs, y:ys)
 235     where (xs,ys) = splitEithers es
 236 \end{code}
 237
 238 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
 239 are of equal length.  Alastair Reid thinks this should only happen if
 240 DEBUGging on; hey, why not?
 241
 242 \begin{code}
 243 zipEqual        :: String -> [a] -> [b] -> [(a,b)]
 244 zipWithEqual    :: String -> (a->b->c) -> [a]->[b]->[c]
 245 zipWith3Equal   :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
 246 zipWith4Equal   :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
 247
 248 #ifndef DEBUG
 249 zipEqual      _ = zip
 250 zipWithEqual  _ = zipWith
 251 zipWith3Equal _ = zipWith3
 252 zipWith4Equal _ = zipWith4
 253 #else
 254 zipEqual _   []     []     = []
 255 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
 256 zipEqual msg _      _      = panic ("zipEqual: unequal lists:"++msg)
 257
 258 zipWithEqual msg z (a:as) (b:bs)=  z a b : zipWithEqual msg z as bs
 259 zipWithEqual _   _ [] []        =  []
 260 zipWithEqual msg _ _ _          =  panic ("zipWithEqual: unequal lists:"++msg)
 261
 262 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
 263                                 =  z a b c : zipWith3Equal msg z as bs cs
 264 zipWith3Equal _   _ [] []  []   =  []
 265 zipWith3Equal msg _ _  _   _    =  panic ("zipWith3Equal: unequal lists:"++msg)
 266
 267 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
 268                                 =  z a b c d : zipWith4Equal msg z as bs cs ds
 269 zipWith4Equal _   _ [] [] [] [] =  []
 270 zipWith4Equal msg _ _  _  _  _  =  panic ("zipWith4Equal: unequal lists:"++msg)
 271 #endif
 272 \end{code}
 273
 274 \begin{code}
 275 -- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
 276 zipLazy :: [a] -> [b] -> [(a,b)]
 277 zipLazy []     _       = []
 278 -- We want to write this, but with GHC 6.4 we get a warning, so it
 279 -- doesn't validate:
 280 -- zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
 281 -- so we write this instead:
 282 zipLazy (x:xs) zs = let y : ys = zs
 283                     in (x,y) : zipLazy xs ys
 284 \end{code}
 285
 286
 287 \begin{code}
 288 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
 289 -- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
 290 -- the places where @p@ returns @True@
 291
 292 stretchZipWith _ _ _ []     _ = []
 293 stretchZipWith p z f (x:xs) ys
 294   | p x       = f x z : stretchZipWith p z f xs ys
 295   | otherwise = case ys of
 296                 []     -> []
 297                 (y:ys) -> f x y : stretchZipWith p z f xs ys
 298 \end{code}
 299
 300
 301 \begin{code}
 302 mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
 303 mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
 304
 305 mapFst f xys = [(f x, y) | (x,y) <- xys]
 306 mapSnd f xys = [(x, f y) | (x,y) <- xys]
 307
 308 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
 309
 310 mapAndUnzip _ [] = ([], [])
 311 mapAndUnzip f (x:xs)
 312   = let (r1,  r2)  = f x
 313         (rs1, rs2) = mapAndUnzip f xs
 314     in
 315     (r1:rs1, r2:rs2)
 316
 317 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
 318
 319 mapAndUnzip3 _ [] = ([], [], [])
 320 mapAndUnzip3 f (x:xs)
 321   = let (r1,  r2,  r3)  = f x
 322         (rs1, rs2, rs3) = mapAndUnzip3 f xs
 323     in
 324     (r1:rs1, r2:rs2, r3:rs3)
 325 \end{code}
 326
 327 \begin{code}
 328 nOfThem :: Int -> a -> [a]
 329 nOfThem n thing = replicate n thing
 330
 331 -- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
 332 --
 333 -- @
 334 --  atLength atLenPred atEndPred ls n
 335 --   | n < 0         = atLenPred n
 336 --   | length ls < n = atEndPred (n - length ls)
 337 --   | otherwise     = atLenPred (drop n ls)
 338 -- @
 339 atLength :: ([a] -> b)
 340          -> (Int -> b)
 341          -> [a]
 342          -> Int
 343          -> b
 344 atLength atLenPred atEndPred ls n
 345   | n < 0     = atEndPred n
 346   | otherwise = go n ls
 347   where
 348     go n [] = atEndPred n
 349     go 0 ls = atLenPred ls
 350     go n (_:xs) = go (n-1) xs
 351
 352 -- Some special cases of atLength:
 353
 354 lengthExceeds :: [a] -> Int -> Bool
 355 -- ^ > (lengthExceeds xs n) = (length xs > n)
 356 lengthExceeds = atLength notNull (const False)
 357
 358 lengthAtLeast :: [a] -> Int -> Bool
 359 lengthAtLeast = atLength notNull (== 0)
 360
 361 lengthIs :: [a] -> Int -> Bool
 362 lengthIs = atLength null (==0)
 363
 364 listLengthCmp :: [a] -> Int -> Ordering
 365 listLengthCmp = atLength atLen atEnd
 366  where
 367   atEnd 0      = EQ
 368   atEnd x
 369    | x > 0     = LT -- not yet seen 'n' elts, so list length is < n.
 370    | otherwise = GT
 371
 372   atLen []     = EQ
 373   atLen _      = GT
 374
 375 equalLength :: [a] -> [b] -> Bool
 376 equalLength []     []     = True
 377 equalLength (_:xs) (_:ys) = equalLength xs ys
 378 equalLength _      _      = False
 379
 380 compareLength :: [a] -> [b] -> Ordering
 381 compareLength []     []     = EQ
 382 compareLength (_:xs) (_:ys) = compareLength xs ys
 383 compareLength []     _      = LT
 384 compareLength _      []     = GT
 385
 386 ----------------------------
 387 singleton :: a -> [a]
 388 singleton x = [x]
 389
 390 isSingleton :: [a] -> Bool
 391 isSingleton [_] = True
 392 isSingleton _   = False
 393
 394 notNull :: [a] -> Bool
 395 notNull [] = False
 396 notNull _  = True
 397
 398 only :: [a] -> a
 399 #ifdef DEBUG
 400 only [a] = a
 401 #else
 402 only (a:_) = a
 403 #endif
 404 only _ = panic "Util: only"
 405 \end{code}
 406
 407 Debugging/specialising versions of \tr{elem} and \tr{notElem}
 408
 409 \begin{code}
 410 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
 411
 412 # ifndef DEBUG
 413 isIn    _msg x ys = x `elem` ys
 414 isn'tIn _msg x ys = x `notElem` ys
 415
 416 # else /* DEBUG */
 417 isIn msg x ys
 418   = elem100 (_ILIT(0)) x ys
 419   where
 420     elem100 _ _ []        = False
 421     elem100 i x (y:ys)
 422       | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
 423                                 (x `elem` (y:ys))
 424       | otherwise       = x == y || elem100 (i +# _ILIT(1)) x ys
 425
 426 isn'tIn msg x ys
 427   = notElem100 (_ILIT(0)) x ys
 428   where
 429     notElem100 _ _ [] =  True
 430     notElem100 i x (y:ys)
 431       | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
 432                                 (x `notElem` (y:ys))
 433       | otherwise      =  x /= y && notElem100 (i +# _ILIT(1)) x ys
 434 # endif /* DEBUG */
 435 \end{code}
 436
 437 %************************************************************************
 438 %*                                                                      *
 439 \subsubsection[Utils-Carsten-mergesort]{A mergesort from Carsten}
 440 %*                                                                      *
 441 %************************************************************************
 442
 443 \begin{display}
 444 Date: Mon, 3 May 93 20:45:23 +0200
 445 From: Carsten Kehler Holst <kehler@cs.chalmers.se>
 446 To: partain@dcs.gla.ac.uk
 447 Subject: natural merge sort beats quick sort [ and it is prettier ]
 448
 449 Here is a piece of Haskell code that I'm rather fond of. See it as an
 450 attempt to get rid of the ridiculous quick-sort routine. group is
 451 quite useful by itself I think it was John's idea originally though I
 452 believe the lazy version is due to me [surprisingly complicated].
 453 gamma [used to be called] is called gamma because I got inspired by
 454 the Gamma calculus. It is not very close to the calculus but does
 455 behave less sequentially than both foldr and foldl. One could imagine
 456 a version of gamma that took a unit element as well thereby avoiding
 457 the problem with empty lists.
 458
 459 I've tried this code against
 460
 461    1) insertion sort - as provided by haskell
 462    2) the normal implementation of quick sort
 463    3) a deforested version of quick sort due to Jan Sparud
 464    4) a super-optimized-quick-sort of Lennart's
 465
 466 If the list is partially sorted both merge sort and in particular
 467 natural merge sort wins. If the list is random [ average length of
 468 rising subsequences = approx 2 ] mergesort still wins and natural
 469 merge sort is marginally beaten by Lennart's soqs. The space
 470 consumption of merge sort is a bit worse than Lennart's quick sort
 471 approx a factor of 2. And a lot worse if Sparud's bug-fix [see his
 472 fpca article ] isn't used because of group.
 473
 474 have fun
 475 Carsten
 476 \end{display}
 477
 478 \begin{code}
 479 group :: (a -> a -> Bool) -> [a] -> [[a]]
 480 -- Given a <= function, group finds maximal contiguous up-runs
 481 -- or down-runs in the input list.
 482 -- It's stable, in the sense that it never re-orders equal elements
 483 --
 484 -- Date: Mon, 12 Feb 1996 15:09:41 +0000
 485 -- From: Andy Gill <andy@dcs.gla.ac.uk>
 486 -- Here is a `better' definition of group.
 487
 488 group _ []     = []
 489 group p (x:xs) = group' xs x x (x :)
 490   where
 491     group' []     _     _     s  = [s []]
 492     group' (x:xs) x_min x_max s
 493         |      x_max `p` x  = group' xs x_min x     (s . (x :))
 494         | not (x_min `p` x) = group' xs x     x_max ((x :) . s)
 495         | otherwise         = s [] : group' xs x x (x :)
 496         -- NB: the 'not' is essential for stablity
 497         --     x `p` x_min would reverse equal elements
 498
 499 generalMerge :: (a -> a -> Bool) -> [a] -> [a] -> [a]
 500 generalMerge _ xs [] = xs
 501 generalMerge _ [] ys = ys
 502 generalMerge p (x:xs) (y:ys) | x `p` y   = x : generalMerge p xs     (y:ys)
 503                              | otherwise = y : generalMerge p (x:xs) ys
 504
 505 -- gamma is now called balancedFold
 506
 507 balancedFold :: (a -> a -> a) -> [a] -> a
 508 balancedFold _ [] = error "can't reduce an empty list using balancedFold"
 509 balancedFold _ [x] = x
 510 balancedFold f l  = balancedFold f (balancedFold' f l)
 511
 512 balancedFold' :: (a -> a -> a) -> [a] -> [a]
 513 balancedFold' f (x:y:xs) = f x y : balancedFold' f xs
 514 balancedFold' _ xs = xs
 515
 516 generalNaturalMergeSort :: (a -> a -> Bool) -> [a] -> [a]
 517 generalNaturalMergeSort _ [] = []
 518 generalNaturalMergeSort p xs = (balancedFold (generalMerge p) . group p) xs
 519
 520 #if NOT_USED
 521 generalMergeSort p [] = []
 522 generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs
 523
 524 mergeSort, naturalMergeSort :: Ord a => [a] -> [a]
 525
 526 mergeSort = generalMergeSort (<=)
 527 naturalMergeSort = generalNaturalMergeSort (<=)
 528
 529 mergeSortLe le = generalMergeSort le
 530 #endif
 531
 532 sortLe :: (a->a->Bool) -> [a] -> [a]
 533 sortLe le = generalNaturalMergeSort le
 534
 535 sortWith :: Ord b => (a->b) -> [a] -> [a]
 536 sortWith get_key xs = sortLe le xs
 537   where
 538     x `le` y = get_key x < get_key y
 539
 540 on :: (a -> a -> c) -> (b -> a) -> b -> b -> c
 541 on cmp sel = \x y -> sel x `cmp` sel y
 542
 543 \end{code}
 544
 545 %************************************************************************
 546 %*                                                                      *
 547 \subsection[Utils-transitive-closure]{Transitive closure}
 548 %*                                                                      *
 549 %************************************************************************
 550
 551 This algorithm for transitive closure is straightforward, albeit quadratic.
 552
 553 \begin{code}
 554 transitiveClosure :: (a -> [a])         -- Successor function
 555                   -> (a -> a -> Bool)   -- Equality predicate
 556                   -> [a]
 557                   -> [a]                -- The transitive closure
 558
 559 transitiveClosure succ eq xs
 560  = go [] xs
 561  where
 562    go done []                      = done
 563    go done (x:xs) | x `is_in` done = go done xs
 564                   | otherwise      = go (x:done) (succ x ++ xs)
 565
 566    _ `is_in` []                 = False
 567    x `is_in` (y:ys) | eq x y    = True
 568                     | otherwise = x `is_in` ys
 569 \end{code}
 570
 571 %************************************************************************
 572 %*                                                                      *
 573 \subsection[Utils-accum]{Accumulating}
 574 %*                                                                      *
 575 %************************************************************************
 576
 577 A combination of foldl with zip.  It works with equal length lists.
 578
 579 \begin{code}
 580 foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
 581 foldl2 _ z [] [] = z
 582 foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
 583 foldl2 _ _ _      _      = panic "Util: foldl2"
 584
 585 all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
 586 -- True if the lists are the same length, and
 587 -- all corresponding elements satisfy the predicate
 588 all2 _ []     []     = True
 589 all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
 590 all2 _ _      _      = False
 591 \end{code}
 592
 593 Count the number of times a predicate is true
 594
 595 \begin{code}
 596 count :: (a -> Bool) -> [a] -> Int
 597 count _ [] = 0
 598 count p (x:xs) | p x       = 1 + count p xs
 599                | otherwise = count p xs
 600 \end{code}
 601
 602 @splitAt@, @take@, and @drop@ but with length of another
 603 list giving the break-off point:
 604
 605 \begin{code}
 606 takeList :: [b] -> [a] -> [a]
 607 takeList [] _ = []
 608 takeList (_:xs) ls =
 609    case ls of
 610      [] -> []
 611      (y:ys) -> y : takeList xs ys
 612
 613 dropList :: [b] -> [a] -> [a]
 614 dropList [] xs    = xs
 615 dropList _  xs@[] = xs
 616 dropList (_:xs) (_:ys) = dropList xs ys
 617
 618
 619 splitAtList :: [b] -> [a] -> ([a], [a])
 620 splitAtList [] xs     = ([], xs)
 621 splitAtList _ xs@[]   = (xs, xs)
 622 splitAtList (_:xs) (y:ys) = (y:ys', ys'')
 623     where
 624       (ys', ys'') = splitAtList xs ys
 625
 626 -- drop from the end of a list
 627 dropTail :: Int -> [a] -> [a]
 628 dropTail n = reverse . drop n . reverse
 629
 630 snocView :: [a] -> Maybe ([a],a)
 631         -- Split off the last element
 632 snocView [] = Nothing
 633 snocView xs = go [] xs
 634             where
 635                 -- Invariant: second arg is non-empty
 636               go acc [x]    = Just (reverse acc, x)
 637               go acc (x:xs) = go (x:acc) xs
 638               go _   []     = panic "Util: snocView"
 639
 640 split :: Char -> String -> [String]
 641 split c s = case rest of
 642                 []     -> [chunk]
 643                 _:rest -> chunk : split c rest
 644   where (chunk, rest) = break (==c) s
 645 \end{code}
 646
 647
 648 %************************************************************************
 649 %*                                                                      *
 650 \subsection[Utils-comparison]{Comparisons}
 651 %*                                                                      *
 652 %************************************************************************
 653
 654 \begin{code}
 655 isEqual :: Ordering -> Bool
 656 -- Often used in (isEqual (a `compare` b))
 657 isEqual GT = False
 658 isEqual EQ = True
 659 isEqual LT = False
 660
 661 thenCmp :: Ordering -> Ordering -> Ordering
 662 {-# INLINE thenCmp #-}
 663 thenCmp EQ       ordering = ordering
 664 thenCmp ordering _        = ordering
 665
 666 eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
 667 eqListBy _  []     []     = True
 668 eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
 669 eqListBy _  _      _      = False
 670
 671 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
 672     -- `cmpList' uses a user-specified comparer
 673
 674 cmpList _   []     [] = EQ
 675 cmpList _   []     _  = LT
 676 cmpList _   _      [] = GT
 677 cmpList cmp (a:as) (b:bs)
 678   = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
 679 \end{code}
 680
 681 \begin{code}
 682 removeSpaces :: String -> String
 683 removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace
 684 \end{code}
 685
 686 %************************************************************************
 687 %*                                                                      *
 688 \subsection{Edit distance}
 689 %*                                                                      *
 690 %************************************************************************
 691
 692 \begin{code}
 693 -- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
 694 -- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
 695 -- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
 696 -- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
 697 -- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
 698 --     http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
 699 restrictedDamerauLevenshteinDistance :: String -> String -> Int
 700 restrictedDamerauLevenshteinDistance str1 str2
 701   = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
 702   where
 703     m = length str1
 704     n = length str2
 705
 706 restrictedDamerauLevenshteinDistanceWithLengths
 707   :: Int -> Int -> String -> String -> Int
 708 restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
 709   | m <= n
 710   = if n <= 32 -- n must be larger so this check is sufficient
 711     then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
 712     else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
 713
 714   | otherwise
 715   = if m <= 32 -- m must be larger so this check is sufficient
 716     then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
 717     else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
 718
 719 restrictedDamerauLevenshteinDistance'
 720   :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int
 721 restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
 722   | [] <- str1 = n
 723   | otherwise  = extractAnswer $
 724                  foldl' (restrictedDamerauLevenshteinDistanceWorker
 725                              (matchVectors str1) top_bit_mask vector_mask)
 726                         (0, 0, m_ones, 0, m) str2
 727   where
 728     m_ones@vector_mask = (2 ^ m) - 1
 729     top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
 730     extractAnswer (_, _, _, _, distance) = distance
 731
 732 restrictedDamerauLevenshteinDistanceWorker
 733       :: (Bits bv) => IM.IntMap bv -> bv -> bv
 734       -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
 735 restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
 736                                            (pm, d0, vp, vn, distance) char2
 737   = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
 738     seq pm' $ seq d0' $ seq vp' $ seq vn' $
 739     seq distance'' $ seq char2 $
 740     (pm', d0', vp', vn', distance'')
 741   where
 742     pm' = IM.findWithDefault 0 (ord char2) str1_mvs
 743
 744     d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
 745       .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
 746           -- No need to mask the shiftL because of the restricted range of pm
 747
 748     hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
 749     hn' = d0' .&. vp
 750
 751     hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
 752     hn'_shift = (hn' `shiftL` 1) .&. vector_mask
 753     vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
 754     vn' = d0' .&. hp'_shift
 755
 756     distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
 757     distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
 758
 759 sizedComplement :: Bits bv => bv -> bv -> bv
 760 sizedComplement vector_mask vect = vector_mask `xor` vect
 761
 762 matchVectors :: Bits bv => String -> IM.IntMap bv
 763 matchVectors = snd . foldl' go (0 :: Int, IM.empty)
 764   where
 765     go (ix, im) char = let ix' = ix + 1
 766                            im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
 767                        in seq ix' $ seq im' $ (ix', im')
 768
 769 #ifdef __GLASGOW_HASKELL__
 770 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
 771                       :: Word32 -> Int -> Int -> String -> String -> Int #-}
 772 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
 773                       :: Integer -> Int -> Int -> String -> String -> Int #-}
 774
 775 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
 776                :: IM.IntMap Word32 -> Word32 -> Word32
 777                -> (Word32, Word32, Word32, Word32, Int)
 778                -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
 779 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
 780                :: IM.IntMap Integer -> Integer -> Integer
 781                -> (Integer, Integer, Integer, Integer, Int)
 782                -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
 783
 784 {-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
 785 {-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
 786
 787 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
 788 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
 789 #endif
 790
 791 fuzzyMatch :: String -> [String] -> [String]
 792 fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
 793
 794 -- | Search for possible matches to the users input in the given list,
 795 -- returning a small number of ranked results
 796 fuzzyLookup :: String -> [(String,a)] -> [a]
 797 fuzzyLookup user_entered possibilites
 798   = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
 799     [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
 800                        , let distance = restrictedDamerauLevenshteinDistance
 801                                             poss_str user_entered
 802                        , distance <= fuzzy_threshold ]
 803   where
 804     -- Work out an approriate match threshold
 805     -- (about a quarter of the # of characters the user entered)
 806     fuzzy_threshold = max (round $ fromInteger (genericLength user_entered) / (4 :: Rational)) 1
 807     mAX_RESULTS = 3
 808 \end{code}
 809
 810 %************************************************************************
 811 %*                                                                      *
 812 \subsection[Utils-pairs]{Pairs}
 813 %*                                                                      *
 814 %************************************************************************
 815
 816 \begin{code}
 817 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
 818 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
 819 \end{code}
 820
 821 \begin{code}
 822 seqList :: [a] -> b -> b
 823 seqList [] b = b
 824 seqList (x:xs) b = x `seq` seqList xs b
 825 \end{code}
 826
 827 Global variables:
 828
 829 \begin{code}
 830 global :: a -> IORef a
 831 global a = unsafePerformIO (newIORef a)
 832 \end{code}
 833
 834 \begin{code}
 835 consIORef :: IORef [a] -> a -> IO ()
 836 consIORef var x = do
 837   atomicModifyIORef var (\xs -> (x:xs,()))
 838 \end{code}
 839
 840 \begin{code}
 841 globalMVar :: a -> MVar a
 842 globalMVar a = unsafePerformIO (newMVar a)
 843
 844 globalEmptyMVar :: MVar a
 845 globalEmptyMVar = unsafePerformIO newEmptyMVar
 846 \end{code}
 847
 848 Module names:
 849
 850 \begin{code}
 851 looksLikeModuleName :: String -> Bool
 852 looksLikeModuleName [] = False
 853 looksLikeModuleName (c:cs) = isUpper c && go cs
 854   where go [] = True
 855         go ('.':cs) = looksLikeModuleName cs
 856         go (c:cs)   = (isAlphaNum c || c == '_' || c == '\'') && go cs
 857 \end{code}
 858
 859 Akin to @Prelude.words@, but acts like the Bourne shell, treating
 860 quoted strings as Haskell Strings, and also parses Haskell [String]
 861 syntax.
 862
 863 \begin{code}
 864 getCmd :: String -> Either String             -- Error
 865                            (String, String) -- (Cmd, Rest)
 866 getCmd s = case break isSpace $ dropWhile isSpace s of
 867            ([], _) -> Left ("Couldn't find command in " ++ show s)
 868            res -> Right res
 869
 870 toCmdArgs :: String -> Either String             -- Error
 871                               (String, [String]) -- (Cmd, Args)
 872 toCmdArgs s = case getCmd s of
 873               Left err -> Left err
 874               Right (cmd, s') -> case toArgs s' of
 875                                  Left err -> Left err
 876                                  Right args -> Right (cmd, args)
 877
 878 toArgs :: String -> Either String   -- Error
 879                            [String] -- Args
 880 toArgs str
 881     = case dropWhile isSpace str of
 882       s@('[':_) -> case reads s of
 883                    [(args, spaces)]
 884                     | all isSpace spaces ->
 885                        Right args
 886                    _ ->
 887                        Left ("Couldn't read " ++ show str ++ "as [String]")
 888       s -> toArgs' s
 889  where
 890   toArgs' s = case dropWhile isSpace s of
 891               [] -> Right []
 892               ('"' : _) -> case reads s of
 893                            [(arg, rest)]
 894                               -- rest must either be [] or start with a space
 895                             | all isSpace (take 1 rest) ->
 896                                case toArgs' rest of
 897                                Left err -> Left err
 898                                Right args -> Right (arg : args)
 899                            _ ->
 900                                Left ("Couldn't read " ++ show s ++ "as String")
 901               s' -> case break isSpace s' of
 902                     (arg, s'') -> case toArgs' s'' of
 903                                   Left err -> Left err
 904                                   Right args -> Right (arg : args)
 905 \end{code}
 906
 907 -- -----------------------------------------------------------------------------
 908 -- Floats
 909
 910 \begin{code}
 911 readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
 912 readRational__ r = do
 913      (n,d,s) <- readFix r
 914      (k,t)   <- readExp s
 915      return ((n%1)*10^^(k-d), t)
 916  where
 917      readFix r = do
 918         (ds,s)  <- lexDecDigits r
 919         (ds',t) <- lexDotDigits s
 920         return (read (ds++ds'), length ds', t)
 921
 922      readExp (e:s) | e `elem` "eE" = readExp' s
 923      readExp s                     = return (0,s)
 924
 925      readExp' ('+':s) = readDec s
 926      readExp' ('-':s) = do (k,t) <- readDec s
 927                            return (-k,t)
 928      readExp' s       = readDec s
 929
 930      readDec s = do
 931         (ds,r) <- nonnull isDigit s
 932         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
 933                 r)
 934
 935      lexDecDigits = nonnull isDigit
 936
 937      lexDotDigits ('.':s) = return (span isDigit s)
 938      lexDotDigits s       = return ("",s)
 939
 940      nonnull p s = do (cs@(_:_),t) <- return (span p s)
 941                       return (cs,t)
 942
 943 readRational :: String -> Rational -- NB: *does* handle a leading "-"
 944 readRational top_s
 945   = case top_s of
 946       '-' : xs -> - (read_me xs)
 947       xs       -> read_me xs
 948   where
 949     read_me s
 950       = case (do { (x,"") <- readRational__ s ; return x }) of
 951           [x] -> x
 952           []  -> error ("readRational: no parse:"        ++ top_s)
 953           _   -> error ("readRational: ambiguous parse:" ++ top_s)
 954
 955
 956 -----------------------------------------------------------------------------
 957 -- Create a hierarchy of directories
 958
 959 createDirectoryHierarchy :: FilePath -> IO ()
 960 createDirectoryHierarchy dir | isDrive dir = return () -- XXX Hack
 961 createDirectoryHierarchy dir = do
 962   b <- doesDirectoryExist dir
 963   unless b $ do createDirectoryHierarchy (takeDirectory dir)
 964                 createDirectory dir
 965
 966 -----------------------------------------------------------------------------
 967 -- Verify that the 'dirname' portion of a FilePath exists.
 968 --
 969 doesDirNameExist :: FilePath -> IO Bool
 970 doesDirNameExist fpath = case takeDirectory fpath of
 971                          "" -> return True -- XXX Hack
 972                          _  -> doesDirectoryExist (takeDirectory fpath)
 973
 974 -- --------------------------------------------------------------
 975 -- check existence & modification time at the same time
 976
 977 modificationTimeIfExists :: FilePath -> IO (Maybe ClockTime)
 978 modificationTimeIfExists f = do
 979   (do t <- getModificationTime f; return (Just t))
 980         `catchIO` \e -> if isDoesNotExistError e
 981                         then return Nothing
 982                         else ioError e
 983
 984 -- split a string at the last character where 'pred' is True,
 985 -- returning a pair of strings. The first component holds the string
 986 -- up (but not including) the last character for which 'pred' returned
 987 -- True, the second whatever comes after (but also not including the
 988 -- last character).
 989 --
 990 -- If 'pred' returns False for all characters in the string, the original
 991 -- string is returned in the first component (and the second one is just
 992 -- empty).
 993 splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
 994 splitLongestPrefix str pred
 995   | null r_pre = (str,           [])
 996   | otherwise  = (reverse (tail r_pre), reverse r_suf)
 997                            -- 'tail' drops the char satisfying 'pred'
 998   where (r_suf, r_pre) = break pred (reverse str)
 999
1000 escapeSpaces :: String -> String
1001 escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
1002
1003 type Suffix = String
1004
1005 --------------------------------------------------------------
1006 -- * Search path
1007 --------------------------------------------------------------
1008
1009 -- | The function splits the given string to substrings
1010 -- using the 'searchPathSeparator'.
1011 parseSearchPath :: String -> [FilePath]
1012 parseSearchPath path = split path
1013   where
1014     split :: String -> [String]
1015     split s =
1016       case rest' of
1017         []     -> [chunk]
1018         _:rest -> chunk : split rest
1019       where
1020         chunk =
1021           case chunk' of
1022 #ifdef mingw32_HOST_OS
1023             ('\"':xs@(_:_)) | last xs == '\"' -> init xs
1024 #endif
1025             _                                 -> chunk'
1026
1027         (chunk', rest') = break isSearchPathSeparator s
1028
1029 data Direction = Forwards | Backwards
1030
1031 reslash :: Direction -> FilePath -> FilePath
1032 reslash d = f
1033     where f ('/'  : xs) = slash : f xs
1034           f ('\\' : xs) = slash : f xs
1035           f (x    : xs) = x     : f xs
1036           f ""          = ""
1037           slash = case d of
1038                   Forwards -> '/'
1039                   Backwards -> '\\'
1040 \end{code}
1041
1042 %************************************************************************
1043 %*                                                                      *
1044 \subsection[Utils-Data]{Utils for defining Data instances}
1045 %*                                                                      *
1046 %************************************************************************
1047
1048 These functions helps us to define Data instances for abstract types.
1049
1050 \begin{code}
1051 abstractConstr :: String -> Constr
1052 abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
1053 \end{code}
1054
1055 \begin{code}
1056 abstractDataType :: String -> DataType
1057 abstractDataType n = mkDataType n [abstractConstr n]
1058 \end{code}
1059