ghc/tests/programs/ipoole_spec_class/Main.lhs

   1 \input{LiterateUtils}
   2 {
   3 \DownLevel
   4 \author{Ian Poole}
   5 \filetitle{Specimen classification (Animation)}
   6 \maybemaketitle
   7 \noindent{\verb{!%W% %G%!}
   8 \begin{vb}
   9
  10 > module Main (main) where
  11 > import GoferPreludeBits
  12 > import Io
  13 > import Lib
  14 > import JobImp
  15 > import JobApp
  16 > import Lognum
  17
  18
  19 \end{verbatim}\end{vb}
  20
  21 Here we give the Gofer specification of the Cytoline/ILDAS
  22 specimen classifier.  This is based on the
  23 description given in appendix C of the software requirements
  24 specification.  Note however that the scheme here
  25 is modified to admit that even normal specimens contain
  26 abnormal objects.   Thus, beta distributions are allowed
  27 for both the normal and abnormal specimens.
  28
  29
  30 The purpose of the specimen classifier is to integrate all
  31 evidence from the low and high resolution passes (and in
  32 Cytoline, from interactive review), to deliver a decision
  33 for the slide:
  34 \footnote{We will consistently adopt the order Abnormal, Normal
  35  Leukocyte, Junk for object classes, which we
  36 will abbreviate to {\tt A, N, L, J}, and Abnormal, Normal for
  37 specimen classifications, which we will abbreviate as {\tt Abn, Nrm}}
  38
  39 \begin{Dec}{SpecDecision}
  40 The specimen level decision.
  41 \begin{vb}
  42
  43 > data SpecDecision = SignOut | Review
  44
  45 \end{verbatim}\end{vb}\end{Dec}
  46
  47
  48 \sectionH{Specimen level evidence}
  49
  50 \begin{Dec}{SpecEvidence}
  51 We assume that all evidence is expressed as likelihoods of observed
  52 features given each the four object classes, Normal, Abnormal, Leukocyte,
  53 Junk.  A posterior probability is assumed to exist for every object seen,
  54 based (only) on the most refined evidence available.
  55 \begin{vb}
  56
  57 > type Prob = Double
  58 > type SpecEvidence = [(Int, ObjProbVec)]
  59 > type ObjProbVec = (Prob, Prob, Prob, Prob)
  60
  61 \end{verbatim}\end{vb}\end{Dec}
  62
  63
  64 \begin{Def}{composeEvidence}
  65
  66 Extract object level evidence from the low and high resolution
  67 target structures.   The evidence those objects which were not rescaned
  68 is fabricated by replicating the mean PP within the object class,
  69 according to the count in that object class.
  70 \begin{vb}
  71
  72 > type TargetList =  ()
  73 > type TargetRecord = ()
  74
  75 > composeEvidence :: TargetList -> TargetList -> SpecEvidence
  76 > composeEvidence high low
  77 >       = extractEvidence low ++ extractEvidence high
  78 >       where
  79 >       extractEvidence ts
  80 >               = [(1, getPPs t) | t <- ts] ++
  81 >                 [(countA', meanPPA),
  82 >                  (countN, meanPPN),
  83 >                  (countL, meanPPL),
  84 >                  (countJ, meanPPJ)]
  85 >
  86 >                 where
  87 >                        countA' = countA - length ts  -- avoid double count
  88
  89 The extraction functions should come from the TargetList ADT,
  90 but we leave this as yet unresolved.
  91
  92 >                        getPPs :: TargetRecord -> ObjProbVec
  93 >                        getPPs = undefined
  94 >                        (ts,
  95 >                         countA, countN, countL, countJ,
  96 >                         meanPPA, meanPPN, meanPPL, meanPPJ)
  97 >                              = extractFromStruct ts
  98 >                        extractFromStruct ts = undefined
  99 >
 100
 101 \end{verbatim}\end{vb}\end{Def}
 102
 103 \sectionH{Specimen level model}
 104
 105 \begin{Dec}{SpecModel}
 106 It is assumed that the following model parameters are known:
 107 \begin{itemize}
 108 \item {\tt specPriorSM}      --- Prior probability for specimen classification
 109 \item {\tt objProportionsSM} --- Mean proportions of objects, by class, on normal and
 110       abnormal specimens.
 111 \item {\tt cvAbnObjSM}       --- Coefficient of variation in proportion of abnormal cells
 112       on normal and abnormal specimens.
 113 \item {\tt fnrSM}            --- Acceptable false negative rate.
 114 \end{itemize}
 115 There is facility to read parameters from a file.
 116 \begin{vb}
 117
 118 > type SpecModel = SpecModelImp
 119
 120 #ifdef Gofer
 121
 122 >       in
 123 >       specPriorSM, abnPropsSM, qLqJSM, cvAbnObjSM, betaParamsSM, fnrSM,
 124 >       calibFuncSM, readSpecModelSM, testSpecModelSM
 125
 126 #endif Gofer
 127
 128 > specPriorSM      :: SpecModel -> (Double, Double)
 129 > abnPropsSM       :: SpecModel -> (Double, Double)
 130 > qLqJSM           :: SpecModel -> (Double, Double)
 131 > cvAbnObjSM       :: SpecModel -> (Double, Double)
 132 > betaParamsSM     :: SpecModel -> (BetaParams, BetaParams)
 133 > fnrSM            :: SpecModel -> Double
 134 > calibFuncSM      :: SpecModel -> (Double -> Double)
 135 > readSpecModelSM  :: FileName  -> Job s s SpecModel
 136 > testSpecModelSM  :: SpecModel
 137 >
 138 > type FileName = String
 139 > type BetaParams = (Double, Double, Lognum)
 140
 141 > type SpecModelImp = ((Double, Double), (Double, Double), (Double, Double),
 142 >                      (Double, Double), (BetaParams, BetaParams), Double)
 143 >
 144 > readSpecModelSM = undefined
 145
 146 > specPriorSM (a,b,c,d,e,f)      = a
 147 > abnPropsSM (a,b,c,d,e,f)       = b
 148 > qLqJSM (a,b,c,d,e,f)         = c
 149 > cvAbnObjSM (a,b,c,d,e,f)       = d
 150 > betaParamsSM (a,b,c,d,e,f)     = e
 151 > fnrSM (a,b,c,d,e,f)            = f
 152 > calibFuncSM (a,b,c,d,e,f) = id
 153
 154 > calcBetaParams :: SpecModel -> (BetaParams, BetaParams)
 155 > calcBetaParams sm
 156 >       = (betaAbn, betaNrm)
 157 >         where
 158 >             betaAbn = cv qAbnA cvAbn
 159 >             betaNrm = cv qNrmA cvNrm
 160 >             (qAbnA, qNrmA) = abnPropsSM sm
 161 >             (cvAbn, cvNrm) = cvAbnObjSM sm
 162
 163 > betaAbnDist sm  = beta p
 164 >       where
 165 >       (_,p) = betaParamsSM sm
 166
 167
 168
 169 \end{verbatim}\end{vb}\end{Dec}
 170
 171
 172 \sectionH{Specimen classifier decision}
 173
 174 \begin{Def}{SpecClassifier}
 175 Make a Sign-out / Review decision on the basis of evidence
 176 from all objects seen.   The decision is a simple threshold
 177 --- is the predicted false negative rate (fnr) acceptable
 178 in the light of the posterior probability of abnormality
 179 and specimen prior?
 180
 181 \begin{vb}
 182
 183 > type SpecClass a = (a,a) -> a
 184 > classAbn = fst
 185 > classNrm = snd
 186
 187 > specClassifier :: SpecModel -> SpecEvidence -> SpecDecision
 188 > specClassifier sm evidence
 189 >
 190 >       = if (truePostProb / pAbn) < fnr then
 191 >               SignOut
 192 >         else
 193 >               Review
 194 >
 195 >       where
 196 >            truePostProb = calibFunc (pAbnGvnEv sm evidence)
 197 >            (pAbn, _) = specPriorSM sm
 198 >            calibFunc = calibFuncSM sm
 199 >            fnr       = fnrSM sm
 200
 201 \end{verbatim}\end{vb}\end{Def}
 202
 203 \sectionH{Specimen posterior probability}
 204
 205 \begin{Def}{pAbnGvnEv}
 206 Compute the posterior probability $P(S=\mbox{Abn} | {\bf x})$.
 207 \begin{vb}
 208
 209 > type LikelyHood = Prob
 210 > type LikeVec = [LikelyHood]
 211
 212 > pAbnGvnEv :: SpecModel -> SpecEvidence -> Prob
 213 > pAbnGvnEv sm evidence
 214 >
 215 >           = toDouble (
 216 >             (pEvGvnAbn * pAbnL) /
 217 >             (pEvGvnAbn * pAbnL + pEvGvnNrm * pNrmL)) -- Bayes theorem
 218
 219 >       where
 220 >          pEvGvnAbn = lklf sm betaAbn likelihoods
 221 >          pEvGvnNrm = lklf sm betaNrm likelihoods
 222
 223 Convert the evidence, which is in the  form of posterior probably vectors
 224 on objects, into likelyhood vectors on objects.
 225
 226 >          likelihoods :: SpecEvidence
 227 >          likelihoods = [(c,bayes ppvec) | (c,ppvec) <- evidence]
 228 >                     where
 229 >                     bayes (ppA,ppN,ppL,ppJ) =
 230 >                               (ppA/pA, ppN/pN, ppL/pL, ppJ/pJ)
 231
 232 Form the marginal object priors from the specimen-conditional priors.
 233
 234 >
 235 >          pA = let it = qAbnA * pAbn + qNrmA * pNrm in it
 236 >          pN = let it = 1.0 - (pA + pL + pJ) in it
 237
 238 Extract model parameters.
 239
 240 >          (pAbnL, pNrmL) = (toLognum pAbn, toLognum pNrm)
 241 >          (pAbn, pNrm)                 = specPriorSM sm
 242 >          (pL, pJ)                     = qLqJSM sm
 243 >          (betaAbn, betaNrm)           = let it = betaParamsSM sm in it
 244 >          (qAbnA, qNrmA)               = abnPropsSM sm
 245
 246 \end{verbatim}\end{vb}\end{Def}
 247
 248 \begin{Def}{lklf}
 249 Computes the likelyhood $p({\bf x} | S=s)$. The specimen class, $s$,
 250 (Normal or Abnormal) is passed by the within
 251 specimen class objectPriors ({\tt priors}) and coefficient of
 252 variation of abnormal objects {\tt v}).   ${\bf x}$, representing
 253 feature data for all observed objects, is passed as the list
 254 of likelyhood vectors for for each object.  In fact, these likelihoods
 255 are are required only to proportionality.
 256 \begin{vb}
 257
 258 > lklf :: SpecModel -> BetaParams -> SpecEvidence -> Lognum
 259 > lklf sm betaParams evi =
 260 >      integratePowN 6 (fromRational 0.000001, fromRational 0.999)
 261 >          (\ r ->
 262 >               let r' = toDouble r in
 263 >               beta betaParams r *
 264 >               product [toLognum (let y =
 265 >                                       lA * r' * (1.0 - q) +
 266 >                                       lN * (1.0 - r')*(1.0 - q) +
 267 >                                       lL * qL +
 268 >                                       lJ * qJ
 269 >                                  in
 270 >--                                     trace
 271 >--                                     ("y="++show y ++ " r=" ++ show r ++
 272 >--                                      " r'="++ show r' ++ " lA=" ++ show lA)
 273 >                                        y)`pow` c
 274 >                              | (c,(lA,lN,lL,lJ)) <- evi]
 275 >          )
 276 >       where
 277 >               (qL, qJ) = qLqJSM sm
 278 >               q = qL + qJ
 279
 280 > pow :: (Real b) => Lognum -> b -> Lognum
 281
 282 > (LN x) `pow` n = LN (x* toDouble n)
 283
 284 \end{verbatim}\end{vb}\end{Def}
 285
 286
 287
 288
 289 \sectionH{Function integration}
 290
 291 (These definitions more properly belong in a library module).
 292
 293 \begin{Def}{integrate, limitFromAbove}~~~
 294
 295 \begin{itemize}
 296 \item
 297         \verb@integrate (a,b) f@ = $ \int_a^b f (x) dx $
 298 \item
 299         \verb@limitFromAbove a f@ = $  \lim_{x \rightarrow a} f (x) $
 300 \end{itemize}
 301
 302 \begin{vb}
 303
 304 > integrate :: (Enum a, Fractional a) =>
 305 >              (a, a) -> (a -> a) -> a
 306 > integrate (a, b) f =
 307 >--       limitFromAbove zero
 308 >--              (\dx -> sum [ (f x) * dx | x <- [a, a+dx ..b]])
 309 >       simpsons (a,b) f
 310
 311 > integratePowN :: (Enum a, Fractional a, Floating a) =>
 312 >                Int -> (a, a) -> (a -> a) -> a
 313 > integratePowN n (a,b) f
 314 >       = integrate (invufunc a, invufunc b) (\u -> (f . ufunc) u * dxdu u)
 315 >                     where
 316 >                       invufunc x = x ** (fromInt 1 / fromInt n)
 317 >                       ufunc u = u ^ n
 318 >                       dxdu u = (u ^ (n-1)) * fromInt n
 319
 320 > simpsons :: (Enum a, Fractional a) =>
 321 >               (a, a) -> (a -> a) -> a
 322 > simpsons (a,b) f
 323 >       = (h/v3) *
 324 >         (f a + f b + v4 * odds + v2 * evens)
 325 >         where
 326 >           h = ((b-a)/fromInt(n-1))
 327 >           odds =  (sum) [f x | x <- (take ((n-1)`div`2) [a+h,    a+v3*h..])]
 328 >           evens = (sum) [f x | x <- (take ((n-3)`div`2) [a+v2*h, a+v4*h..])]
 329 >           n=201
 330 >           [v1,v2,v3,v4] = map fromInt [1..4]
 331 >
 332
 333 \end{verbatim}\end{vb}\end{Def}
 334
 335 \sectionH{The beta distribution}
 336
 337 A beta distribution is used to model the proportion of abnormal objects
 338 on normal and abnormal specimens.
 339
 340 \begin{Def}{beta}
 341 This is a distribution of a continuous variable in the range 0 to 1.
 342 \begin{vb}
 343
 344 > beta :: BetaParams -> Lognum -> Lognum
 345 > beta (a,b,n) x =
 346 >         f x / n
 347 >         where
 348 >         f x = (x `pow` (a-1.0)) * (((one - x) `pow` (b-1.0)))
 349 >         one = fromInt 1
 350
 351 \end{verbatim}\end{vb}\end{Def}
 352
 353 \sectionH{Testing}
 354
 355 \begin{vb}
 356
 357 > testSpecModelSM
 358 >       = sm
 359 >       where
 360 >               sm   = ((0.5, 0.5),                     -- P(S)
 361 >                      (0.005, 0.0002),                 -- (P(O=A|S=Abn), P(O=A|S=Nrm))
 362 >                      (0.2,0.2),                       -- qL, qN
 363 >                      (0.7, 0.7),                      -- cv - (Abn, Nrm)
 364 >                       calcBetaParams sm,              -- beta dist parmaeters
 365 >                       0.01)                           -- FNR
 366
 367 > showSpecModel sm
 368 >       = unlines [s "P(S)=" ++ (show . specPriorSM) sm,
 369 >                  s "(P(O=A|S=Abn), P(O=A|S=Nrm))=" ++ (show . abnPropsSM) sm,
 370 >                  s "(qL, qN)=" ++ (show . qLqJSM) sm,
 371 >                  s "cvAbnObj=" ++ (show . cvAbnObjSM) sm]
 372 >         where s = (ljustify 33 . show)
 373
 374 > nrm = (0.0, 1.0, 0.0, 0.0)
 375 > abn = (1.0, 0.0, 0.0, 0.0)
 376
 377 > runS nAbn nNrm =
 378 >          pAbnGvnEv testSpecModelSM [(nAbn, abn), (nNrm, nrm)]
 379
 380
 381 > cv :: Double -> Double -> BetaParams
 382 > cv m v = (a, b,n)
 383 >          where
 384 >               a = (1.0 - m) / v^2 - m
 385 >               b = a / m - a
 386 >               n = integratePowN 6
 387 >                       (toLognum 0.000001, toLognum 0.999)
 388 >                       (\x-> beta (a,b, toLognum 1) x)
 389
 390
 391 > main = go (
 392 >               (putLine . showSpecModel) testSpecModelSM >>
 393 >               putLine (tabulate2D 5
 394 >                     [0, 10]
 395 >                     [0, 100, 20000]
 396 >                      runS))
 397
 398 }
 399 \EndFile
 400
 401
 402   tabulate2D :: (Show{-was:Text-} a, Show{-was:Text-} b, Show{-was:Text-} c) => Int -> [a] -> [b] -> (a->b->c) -> String
 403
 404 > tabulate2D w alist blist f
 405 >       = (jshow2' " ") ++ "   " ++ concat (map jshow alist) ++ "\n\n" ++
 406 >         unlines [jshow2 b ++ " : " ++ concat [jshow (f a b) | a <- alist ]
 407 >                | b <- blist]
 408 >         where
 409 >         jshow x =  (ljustify w (take w (showFixed x))) ++ " "
 410 >         jshow2 x = (ljustify (w*2) (take w (showFixed x))) ++ " "
 411 >         jshow2' x = (ljustify (w*2) (take w (show x))) ++ " "
 412 >
 413
 414 > tabulate1D :: (Show{-was:Text-} a, Show{-was:Text-} b) => Int -> [a] -> (a->b) -> String
 415 > tabulate1D w alist f
 416 >       = unlines [jshow a ++ "   " ++ jshow (f  a)
 417 >                | a <- alist]
 418 >         where
 419 >         jshow x = (ljustify w (show x))
 420 >
 421
 422 Showfixed formats a Real in *non* exponent form.
 423 Big hack --- must be a better way !?!
 424
 425 > showFixed :: Real a => a -> String
 426 > showFixed x
 427 >       | dx >= 1.0 = show x
 428 >       | dx < 0.0000001 = show x
 429 >       | otherwise = showFixed' 0 dx
 430 >         where
 431 >         dx = toDouble x
 432 >         showFixed' n x = if x > 0.1 then
 433 >                             "0." ++ (take n (repeat '0')) ++ show (round (x*1000000.0))
 434 >                          else
 435 >                             showFixed' (n+1) (x*fromInt 10)
 436 >
 437