From 35b4af7ef98c04944208a52dd230dd1c8803eed7 Mon Sep 17 00:00:00 2001
From: adam <adam@megacz.com>
Date: Mon, 15 Mar 2004 17:55:33 -0800
Subject: [PATCH] integrated Brians comments on ivme paper

darcs-hash:20040316015533-5007d-68b9a12b0b3fb809a67dd7ee18013bb5eed345e7.gz
---
 doc/IVME.xls                |  Bin 0 -> 38912 bytes
 doc/mips2java.tex           |  274 -----------------
 doc/nestedvm.ivme04.tex     |  708 +++++++++++++++++++++++++++++++++++++++++++
 doc/performance.results.txt |   36 ---
 4 files changed, 708 insertions(+), 310 deletions(-)
 create mode 100644 doc/IVME.xls
 delete mode 100644 doc/mips2java.tex
 create mode 100644 doc/nestedvm.ivme04.tex
 delete mode 100644 doc/performance.results.txt
diff --git a/doc/IVME.xls b/doc/IVME.xls
new file mode 100644
index 0000000000000000000000000000000000000000..e3bf8c7d31c5de9683fad409cc06bc8438c8afe0
GIT binary patch
literal 38912
zcmeHQd2|%VnXjH1Nh5*KfG}XrMm9zql12xIu|NWZ790dJk{#Q@fk87!1CnMkGho59
zV>#fLIP1KX<K4}@!Hyj#FWD?P?7ZEVEOL_F=Rb1rcAe)OyBm_cT$^Mga`S>c`}<XO
z&vdtDMh8L`ppvS)>-y@eufD4Ky874C=f5@k!qcB!_(S1vTZCJ@o-7pkmUs=<^wRl+
z_$yp6{CYB(RFOj9a@t)(5_k=~-igk0!+YTK;MwN|@Lu>r_#*fj@H63y;b*~@z(WPZ
zZ1_3wrSNm%Z-So(KOcSp{LSzS;TOR#hF=1|6n+`J4!<0}4E`4QTj5v0-v+-Dz8roP
z{A&0$@N3~K;49&);QjE`@HOzY@OAL(;Mc=%fZqszJNzd2JK*c#H^Xm%Z-8%v-wMAC
zemi^<{0{hLc<MC$F3a^b)UBgtRqR6pufVyWF=k~h(9yj-3Na{;Qk3cn$Nt|ob-!5j
zHEk!Z2URxlW)TrFaa0__IfC<8-5lZ4sJA8$qbbj-k36n&=7~0OO!VL{06tELU7}YU
zmOL>gA;P%s%|Wyr^>Ys7_#qEDvf7fqWQ6jdbrI1ac?hAV#2ES6i2BX8kG6jDdM}xV
zEA(O%=jVyrD!*Rf!)%@p;rvjz#97ZvI12G2`28Zv@xL_~=+Rq3oq>*%`c-$^Bo6_N
zJZ#oxf-@In?18rBix1;^7G@Tvq5p9#mB)XQxxb{!$XUrVqp$VnYd8;ZrFyrkQRaJ(
zJeJ|8few#flx4mr<Et@uy^Q?HM+TIP;sxTKP%Iee4X7jyNnc{ELO7FL*-N>zm&#F>
z2a_(@z`wID59Z4(NjV5kaMA)Oe^zN^O&VXRoSl-jUDI{hwKuDva)@{AGF<9ejG+o6
zpzPMNTh`uki~qo?JNB2Gf&HtNdX{4#!yquDkp7l^q2L4dOl7b`U@@4PD*bjk2$R%G
zSSn@CTn=hR)(tQwU@jaWE30*rGb6aaTt@6)b%zmAR#67N4wN?@KrNN9MPQ8_D7Thd
zty>GN)@>ykV6njVuvVs8V6RWIW~s8FAzkTu7(Os@Qk9zJQ}vpKW!34D7o;&J#2=En
zoT$`cP-i@6H5vbVGbwjGr@Bn}v&EBYru^4)ye4!u&u~hfdFVSShd(>xS7nJ`n<ajA
z#(0|Ozo)_B=tr9Gnd0+c%scsc%H<Khes_}R*#r6VJZHco&!qze@;rB-P@ZoZ@XE6o
z5b`{)KQDO@3#TR?gQ-8ry8+x1v;_Kg^~QT!L$Q`nqAL=745q%0(aBqaI|Ii9ZINTK
zj*xylu5a%S#N*B3?$BeH2%hCl2$XYoq$AL+-xG+%nWZ@#>JG*q!(GQiai-^q)<9<n
z5%a|UhZ!+T><J{o$0fa%U9D}*`s6!R);*C#DE<U&1`U6oft8pqG|zl=6z_4AV~W74
zPgQms_dp);lIDdMA`sTZFU31}uQPN~7RVoJ#970<Md9;)h{7ff(j*l{Nu6nzGHj+X
zFT=HB#?Ty`&tcy3ibu6V+-ePKjW{-nr57nj++}$|-HQp-`zJ{CvbgM8iFky0*X5Kk
znAsgZbR-(;gqen`!?92(aUvS>;=U%*6NQLlMZzL|RG1PzN_3}7AO)eGcr?&)v`k);
zs$QW7LmiP|NKbTy4#mR3(1GfjjsD6b(N0+Gmg2}53-ty=F}<TJ5DRo9Lg>fZnu_Y0
z4SM9T4ram~dRw43uD7)5frK87gnJWuJbW?~*AK@cJ$hY5U7cPY?(OJ47Du<P(s3CJ
z#lyXw`k@ng-3kkPsB1@Gf|clj?rsv?9*KqY=16ZMjz%ff(f9UtpFrEwYJ~}TU#K%2
zm#oEV*6OiZ_&Pn%8`NVpx9b7Wh$lcPh`RM|sgB13v2fr}cPNgYlnRKRRC<SqeNe!N
zkq|_eH+KgT5MxU?8n2<kcaBDkdLp6+^h9SsFK<#3PXuBJRYZ;@4x|4Zf@j8IU9oCl
zQu{(LZ*Jcf>h0)K-CEh+-kiRwRBEuQX_OtNdcShVy*uK1c}rU>)KAe~T^Z^NtxAeH
z9<TTu#5z-)OD;l(9>aZ41G@PG+PUO2LubSn{4Q-<H-?9D@$<rS=~#52-2c$I<kLf1
zzYqiE)m=zDgYbEtG2HiwkO&I#^bohQ2FlleK@{&tPLCYYd21ecS=~`Rqvx%Uiu^5S
zIN(wKWBsBFH-`I@jiOWDyaf<1>k|(@zZ3j4Ho3~m2Ff?g#C0qDLGs5u17f2e;ePT%
z=ehU@hVwK1qOTF1XK{U7H;TTVev$a$E69HaDco4$=0~G(y#Qxyx@eHcGlQb<?XQRj
z-tu$LkDsVR_&j*jU8Nhm1LYN;M)(5q-f}Ma{Kj+1mvzs@hZp0z!E>?q4C3=~eTP7O
zIB!<<J<pAje9w<xXM5=&<L9|)lFzc9i=x$Yv4pbZh7Zy{rXzim*njz~I9WM_aHD|y
zQ?A}WkoV3DLi9b}_2fMZ*CM@DoUGny3HOsPm)7)Wq+PhUR5YGTKG&M}_(9CG!i%^w
z<WDQrA&ciAPlQhn3eDd-fH3^W&XRt%_K*gAsy&=f5q2B&J?E0oLWa+^l0M=K5cWy>
zRloRxs9z1<o)x6;D!uPl=-1N->ule-<i~Zin;S_`7~#WpN{*(yaK_E2t|R@I@b6$4
z<)f_6HS>GybJllIwr}m*geia79z*`**Py47XRb&rkFuc6^3VC$C$c?#r|$*5LB(&^
zS)`+Xo<0B0$DVo$*Met+hj{+Vn_l`mp1C5@<vkwVi4zwnOm93d)^KKBlvlp1w5!~6
zaUFCTVb#Afoq42_b+diXwHozNw`BWM{7HVEll|!`J^W#_y`*RA8&_YWy;9$BeLMUn
zNekmd6JBmQqQ5Z>82cAK+y2E27Zp=E?sYtb{JSw$iqR+yGl&a=zC>QnW;3A<UUB)8
z7yqN>P;0{h8NXV_uaU=h$e4bqM2hAL7)z*MdlV@zc~}<*=P8-{v^+k|`w>m^(7w>5
zdZ>~@T%f+dU1&(2yBm3n#KY2#K$MR>2@k^_{JmJjFlusfzg6upsr^{5iygviR3KVE
zAhg+naqJEicjki(Fplk0aiF9Xh}Yz9uG+s9;>&U`R+GEA{f*qyjmqOed2DU;82R&M
z4(6xbQXoSEMozEH!@5f(2X64oR+orpcD(qz*Iv2YpkiEzd13hr4f)z@MvNOV$DUf?
zxo4Ic<3Y@#s{g(<|0S)FoMJx;F{G_%u;z=W-}=a$Q`;L=jMqYgCD>jmM_GdXT_XOv
z`)7ao<i)?pT50Q*2wCYr{;A>ZE|wr4+P#M)MM@l@`?frK_@|c|6h-ntx~$P9Ti<-*
zr$0ISsoNS=dGf&WucH3<gV}P?F5V@6CBDLPLu(NO4IAPlL~Zn-BT9sOXfEPQglDK6
zahfO`Qhkbm#X&!0IjTV(;+g*l&hMS6;@ya6hn|8(>Br?G&<Kxa+Le2t0;jYV9M5S!
z@GvN^lNeuK(SQcAO+IOf;sRPSL#%|nd!Q405uQE7*axtCe-nB=54_Jl$e73T27)}(
zsog`}Hu|!ThuLCIdOYMkt|+9(gD9z$ebMDB5;Jf6`J9)LVSz}8H0=19p#_lL!c^#G
z$ZrvNWP3Cpk1n58n!nM$le{Eq5p@QFBo9F!djmw^YkU-<OQsk2%q1_0XJ0L%2z1O%
zU6$K}xH`~rBOk*Yr=&vKcVYZW7WxZ#*U#u;A1o9o3`}wwb$9tn1@4!G=%2yo5vDL0
z?Mg=@T~fXaJi18*E+5zC-ZJYtAX`aJcBHOG!4oSfCY$JL@MPX5IQZfaVbey<7aO8v
zkcqD*pPMRrD;RZ&6+RtpRjv8~B>S(XbV1;=5U$@KwAVKmxyi5H-3BTPz!|#F5DkSu
z+U^l)lQ@N!i}uaKXg7nlCbs%G@qHe3uErRigKm8Q#;BFrH<12uF#Knx!7q^|?A)so
zI=EMTuZcrOE;hz!kob*Q>RXDoo1Jf`aSNn`M_p(KYcR{C^N^=yzSZf75w4bLBiFWE
z%D_1|$b+>%gP5V*SgSq(33<WTqu?}(+O1MDnTg>7#RdCwkxk~s=y|3&?{Qc9^5yiY
z+HS4)PUNqJ$#M$kH=*uFU64@p{*3jq&RY0P^{TPjh8x}9V4T${^<y*F!K-<~Ey+?8
z{r&wQL^of~6N7_;GQ=*KhZ&UR=E}y%Xxo7znU5NT_<Q&=%y_0N5jUYsL9r0OKyn{g
z_!bDdwPmU1<bf|U-7IAlN2X!Le3`nzEbkmp<lXdE+S}2^Z78%5=ZB0g&PMyANZ@`2
zMZIv;T}<hv>o#p4&d_Rd4S||V@Tke7oLjKg#)hqpTxTa{ZOj*6)u<fBP}QYaBZsez
z*gQLkXsnG*A)e_bbZvZO_}aMGw*D<ig}62@O@+8NE_*}PMzxG6y&JYR=8s}+EP$Dk
zVGUBz<69eZ@Z^H&GrF~LOngo9+PK&@&^UxAZf#sLjJ9=cd@t0FYhx|fzv;CRlDAHr
zZR1=Um)T@Kd26Gy-s!bb&gtfwXr8TIyame7wQ&WMf26f>2?mR~HZFB^@e0sbF7Djb
zt~W(O@!pjQ-Pps|cVkO91Ox^jt4ISS^J1yY79KM%mRnqN#EY~&L?aJ|Tf&=Cunvd2
zQT!>~ms-WL@2`+f%~m1ay=^l0S6aj?OG-t3Ltjs~ejLvj!jaz1W!07bG94Szksuxy
zY%aUIy}4pzS;Lm1`sGb~x3}Ndx<ik4W8YtIy?fj4UEA@fwyJ7tG>W}-%J&$a2UJxx
zwKwUlyLYv<>mXKDwPR12Ue=XJL^oAcJ^0{*l>uh2?1=O*W4x+07Kw&pi4(g)wE_hy
zgNa}n>QmL&_(E(j+>zK)R9JsBbYcrg7{sD6JOQcXLxtu@tS5lWmYtDU_#~cU1iJTy
z;`LR`k77pg%;gi|j-#POB_5Ro@aV!UQ&=BQfPAO3Iz;sV&C(e{P9sNAVNSx~M5t$L
zEEYJCCLLf9jG>^(S{523H;zQWdiR!EzrVgpUXgH4mO8q_q25G_A7ma&V{_B)i3FPh
ziIA0e5aBH~et%tszqX>fy4}Bi)4G~X)z$acSIMLi`Hd$)0ndgytyIzXA#}qQwA#Fs
z60I^tT3=<#8TE`-f%e8I8a|flH7iT?QAYK<cGM?B)2Jt=@T%={oK~R1aVbz+N1z*%
z#Py~;j%KB19caz9GUwE7*tG8UsW|5VA&FV!x?Hj{G%ck%(+id|0nEi|eDjSdVaEWy
zUKXt^%}XiK^!!6nj9G=^#}2hCyEhtu9eyC(dpL4HnKGkWxon2%*5Hw200jjZxi8d_
zwk%UxnA@6gnBHSFM{}8F!;SC4debVT;7nVQ*CXCsWNju(Tc{!=*F=A%%_1Kmy4JsL
zV`Z(yt+OtzX5pGTyP0p9G{8mXMwlk5)9N3ioO4qjZudjFqcQw*@|?-?Pn)a`)88@<
ztYWc@jxlsGM#T-G%FcFX(dCK_8PzdMbZt$|I-sl5E@R=EItS6T!9A_ZX{{ZTE~7hl
zVJ^p3NGm7yJ%OI|t$2F2%Q)AW)5Iue<H~4HWosm!XzPfDqnPs3yCFG8^p(%Zxtd7r
zH5lm)RCI?D*qXAjmU|K#wlR{T=G=A4-uT=#<eDaHEBqTO{MGH%8#dM5zNvP-?Or9P
zP;*a{!|v=;=E68t>+asb<gEBpK*w^lHOo{vwq<`qZC$2ye%*%kw&-hYY8-yAeFgeE
zLCbu4-mvy5Vz1&lKgAT-+1M-^Tv5HzrhJAyn<aX~hRhm#yFGeZgXMO$+U&lv88{jZ
z-UthPwq{xu`McZn-P4OaAA?NDB5#<0TU&Qw--S;G&CP(69pv{{)?hwP8+*A-Nj#uC
zj$1&yKy)-s=y0Gr9;)KjSFWOb1uI=9^W!Nz`ZMPAcDa{fVJl@l=c;3PGfmsk*~(m0
zGBmP2)y7-}txYs5cAA;#Z5C$}%@)>I$w#JJidw`;LK^WTU0bWI<+JD+zbibLd?ES5
z%nufR%H5e)?!GKxc{{~Urn<i4DrW({#e=ikjgmrpB#CSFY#c$lF0Z&*8<3Z}JZ3yd
z)ILZlaHdOKPG08gm2hd4vL+O2-$bU#=jZ^-DzrWG0uzk7D|>`?{s|M6ui|v@cJ^B)
zDqqF*1HG@xl7@<lAfHE~_kkKQ;&7q4E$DqVRAKpvaA#vxi0F3mq(Vft7Z@St0e&1u
zg`8)(AMAb4`+DbiB=ViVuuvixJ~e3K@mc=6ANa_bPb7Vf3JXsf<Vs=TN#mPu>n;7i
zb*cpMRV8BIPyX#c>+kxLfww1(duCO9?ju!-2018^Z&Q#WX_ZK{!K-+gvk^?T;<c70
z{LM-s=>G+IbOS+8SOWm;DB~V_MwT5Vf*xg5U3LULX-y1DkUdg}-!buu-?|se-<xqF
z=(Q9|@H?HzEZu~n1T6^qca^<gBIvym38+HEbr3<%5K)4&QX!%QOLCwDzoMN_{y`#0
zClW#LMy-77mYPX)stqNmMsTr@FTWI7-d(kU)zwHP6&p~H1feD}dh9^K5!j*z7W)9u
zg)~!d(K@(NIKwe;rCLiNj0r;jUx3el*#V(v({o{ciN__d5{j?nB$2+uVS?kOT{w)k
z6()EAb#BB^`4Aw_i{Y)D{s^3Z7UF$B&}MIfzwsdSmoP<LhH7;}3dN9AE(m=Sqy>GE
z`05gy%uN_8(M@<JOB_gc^!!~Q@ibP9Cvm<H9Wz?=ytB$Fgb@5HUgEhQ%icdjimyV7
zPV~H)juuO2Me&VC>z!lCL<jP0rTs9LzIN338HoFo(Ys{8ItC;|ID8r2L<r7u^zI^=
zmf^<~_@+2}8nod|HcV563)FBBJr{DjQ!$E$N>K)bHJ6HU=7GE|j(G^vPdnlf6<}Xt
z5M@GRIvGFpf@NJ<RDAOfS9m|qMAB!+AQG$iVi<4YADzY^PGb-=-mawYL9ziXc|$Ir
zcwTFe4u7p}k3D1ajeFuuTkI+9oMVU5yf=ebrZI>%^|{_Kh*ML6u2U?Kqgkn0=NiHQ
z0rxyLRmQ|wU5Bvw3{6X^&h&ycjX@j{6l5yU^!&qdW4<qN#Wv4022ni=oW>xI`aLV^
zU#8(|*yy>yHTIW}9E)&Hs+lbRtiu2nf2u#z#=j*tBmXuVISn&)KCYiw09WQ$*>Z~F
z%E4QfclENjlii#FPn!j7m?ip#!5~gKT{a%d-UtwdOfXAo?=%Kcu5IaQazfw<sptJr
zEh~oWs=jqF#UNacwq}_sr!k1~9pfoHHCpxe>aO!sOo5$^&7#3sz=m0(uTKo(@ZF-E
z*4d;Os^;RJ4V*WzHtHL$yV;s)S>&fNh~}2Sge>yLz1(ncUdpO+qVsYNiVX0)!ZbcF
zXHhFaFID+)gq|@XoB+M_JzvIKzO2nOZAWJ-b5Y6A$aEWX6|^?dtax|}&6_r76U}?w
zYZ!y5ezk|VI${gya$^vOwqFSbk!AUeUez%G{NhnrFM)I_?tM%6)L$VGB~lM5Dl|5%
zA|WLv(4a)b@A%(;STpFdU=@qs681jaaLMv(!u!r1)m>{!Em%eVo~sE&m^iE=TSFO?
z5IQK^#@3hcyA9%1;OI`H{e3AM<17c3(Qm~vg28N9#z7tAP}n@DehnC9GGZCoSTJ<u
zzXm)OEMxZ()GFQ8#xfS!@QO21Az~S4rb5Ir7EdiKqe8)$uq?}$DQvJo=Hj0NHmE=~
zR|U*Sm?FW93S*i6Muvl}jA+JOY)vwnvB-vxBJ646(2O&N(awZsd={fF8=CQHOk>k%
z#<8Foi)}JDhs{+*GdioBLTJXXV^$}c@hnDMCN!g&ZcM^9#NpLMGj7EqK{R7I%#V@K
zjQlBU6U{i&(Yq@^W4Yy<JoxHPdL~ML1nQc{HS$|_gj|>5ub_Z-!$`la$W?e_0^hDU
zHkr6a`?o9d$u${=OTPsZ3ftd;QP9%i7HH91(O_vE;$&EdxG>_lgeZ%Vad{Stkqgpz
zEJl3j!(uU7-%L?&w^-jy;fiX1Gll%{EeNZ{_|~5d-QTwQkCtyo{Ldfv%=3QwqUG(D
zzr6a1&EG$v$?rd0M(J{C`I+B;;M*&WR*MhoQ#OkaS#2(5@u5sci;w31q+5LWM1<k4
zw#DbOS#`5hAzFNMQXyJ=rBln|Q!lj?`*>@nK3W&F8C6WrVC$)IbX9CUw_)q4+cbWp
z%)wAbD=!yAlWgVrZ0p9vT6wdF(avP${SB;wY*yZTuu9*UR$c@*!sB^orPL<#NwM;r
zRZbx*?~B+vpq2ME%xsygJTu*#d9Cl2Tum$QS<C>m@&cGQMzZpLjeMq+H^<St%aFE6
zempTKZ92?9_$BL}NbH2ZwWA{x&+&WKY7XM!Z2diJZtB>bilL!lI<<LZhIm#n&OD@@
zcFZ%5_f`sM*1#9wPt!(Y-*E?N!P~J@^vD}&-+^mG9%=io#CXfu{5DxniTnZ<K8d1)
zPR?n&Id>e#X76z)kyeee0h{-dkMUT2{GgH9u_dDG$zMOzu<n};{8pK=`ZQ75$UGMs
z)Em^w&cg@cEIWf^vGd5Ivc_nBvF1F~R3c7qx_R}`$@z^chHpZ@?L((d7AKY%F|;?8
ztvDOD;!Lp-?X4H9#d^H$Y_%3!t=8h)aafBelhIm4{KQy`F0t9A-gWl7MtaxzZ}B?2
za#!10oNKcPZ%Tz|EzV1YXf4j4TGpcah*+`w3{bxDWm%hbroLJ>Y-^AMkfWD7h7<gr
zv&S$OxucCQp_w1Eb#Rr@X3WLaB-@O0ZL844+Ke|1qn*iSd^yL?;J@FnJA*UFX*15Z
z$$V05MrV~%$YvZeb_Soof|ki<G}GlQfx~3kjQxhqcxDtf<L|+jX*14q^sbJy#nunJ
zo3oI<U47L&c+GDLF2Ur=8GH%;3JXVLG}0Pwd6y91hkxTfXbY~%I9zt4@x2d9qY*2K
zG8&Z~Pg~P`OpUKhBFIO`pMI2aOjB`ZQa_7BV@<^&75~aWq)|j`h9Q>1XY-7ZGVbOf
z#LuxF#<`r&P+?%TBA07uCt?m729~T3AJXP_oFDO{{v0dH(7?*^7Q5M4{=SJcx^SF|
zjv}DQUdOM9aN%aNXx($m7sFS^Q!SZ3?XEuwypCVEau-nx?2a)P9{cETFYWE}ed<ZC
zShMo;-{lZ_0-pN8bbfxEI@1VGUD*xK-SdO+Sek%<ke{d*r*Wo^`Bw7FgsRLoeXG1?
zzOBJP4>kij105%29Pg@H!X2?lJaRap@966Yb;~+L)wg%7DEJzF2P^Gz@$dTfH@^Mh
z%0<4X-Ut5IUiu9AF9+jl%H=OjamWjCFFe<vAUv1%Zg}#LfahJ}L-3qX*bdsmPr#G!
zJkYYXi!u>$$P*_Ib^u6=F57u2Dxoa-)WZ3>Q+(xCSbp`0dnTzL8<KNfre|c58sEc-
z%XaO-Prn=wv1rS#);5{ACz3$oOjJz9*%@T+&5_vAcvmQth)dqd(`cQBnSq-n92vTo
zM1>fXN71|xXGcHksjB3RvaxyBHZBgbk1N~ALM63o+D(=OG_hSo@IWVuFV-FsN5)(&
dzq)Om6-PsW<Bwx*qTE>PpQw6GI@faj{|_I61ML6+

literal 0
HcmV?d00001

diff --git a/doc/mips2java.tex b/doc/mips2java.tex
deleted file mode 100644
index 2c271df..0000000
--- a/doc/mips2java.tex
+++ /dev/null
@@ -1,274 +0,0 @@
-%
-% FIXME: - Add something about size limits on the constant pool
-%          how we worked around that and the performance impact
-%          of -o lessconstants
-%        - Add something about encoding data sections as string constants
-%          and the UTF8 non-7-bit-ascii penalty 
-%
-
-\documentclass{acmconf}
-\usepackage{graphicx}
-\usepackage{amssymb,amsmath,epsfig,alltt}
-\sloppy         % better line breaks
-\usepackage{palatino}
-\usepackage{parskip}
-\usepackage{tabularx}
-\usepackage{alltt}
-\bibliographystyle{alpha}
-
-\title{\textbf{\textsf{
-Running Legacy C/C++ Libraries in a Pure Java Environment
-}}}
-\date{}
-\author{\begin{tabular}{@{}c@{}}
-        {\em {Brian Alliet}} \\ \\
-        {{\it Affiliation Goes Here}}\relax
-   \end{tabular}\hskip 1in\begin{tabular}{@{}c@{}}
-        {\em {Adam Megacz}} \\ \\
-        {UC Berkeley}\relax
-\end{tabular}}
-\begin{document}
-
-\maketitle
-
-\begin{abstract}
-Abstract
-\end{abstract}
-
-\section{Introduction}
-
-\subsection{Why would you want to do this?}
-
-The C programming language has been around for over 30 years now.
-There is a huge library of software written in this language.  By
-contrast, Java has been around for less than ten years.  Although it
-offers substantial advantages over C, the set of libraries written in
-this language still lags behind C/C++.
-
-The typical solution to this dilemma is to use JNI or CNI to invoke C
-code from within a Java VM.  Unfortunately, there are a number of
-situations in which this is not an acceptable solution:
-
-\begin{itemize}
-\item Java Applets are not permitted to invoke {\tt Runtime.loadLibrary()}
-\item Java Servlet containers with a {\tt SecurityManager} will not
-      permit loading new JNI libraries.  This configuration is popular
-      with {\it shared hosting} providers and corporate intranets
-      where a number of different parties contribute individual web
-      applications which are run together in a single container.
-\item JNI requires the native library to be compiled ahead of time,
-      separately, for every architecture on which it will be deployed.
-      This is unworkable for situations in which the full set of
-      target architectures is not known at deployment time.
-\item The increasingly popular J2ME platform does not support JNI or CNI.
-\item Unlike Java Bytecode, JNI code is susceptible to buffer overflow
-      and heap corruption attacks.  This can be a major security
-      vulnerability.
-\item JNI often introduces undesirable added complexity to an
-      application.
-\end{itemize}
-
-The technique we present here is based on using a typical ANSI C
-compiler to compile C/C++ code into a MIPS binary, and then using a
-tool to translate that binary on an instruction-by-instruction basis
-into Java bytecode.
-
-The technique presented here is general; we anticipate that it can be
-applied to other secure virtual machines such as Microsoft's .NET.
-
-
-\section{Existing Work: Source-to-Source Translation}
-
-\begin{itemize}
-\item c2java
-\item commercial products
-\end{itemize}
-
-\section{Mips2Java: Binary-to-Binary Translation}
-
-We present Mips2Java, a binary-to-binary translation tool to convert
-MIPS binaries into Java bytecode files.
-
-The process of utilizing Mips2Java begins by using any compiler for
-any language to compile the source library into a statically linked
-MIPS binary.  We used {\tt gcc 3.3.3}, but any compiler which can
-target the MIPS platform should be acceptable.  The binary is
-statically linked with a system library (in the case of C code this is
-{\tt libc}) which translates system requests (such as {\tt open()},
-{\tt read()}, or {\tt write()}) into appropriate invocations of the
-MIPS {\tt SYSCALL} instruction.
-
-The statically linked MIPS binary is then fed to the Mips2Java tool
-(which is itself written in Java), which emits a sequence of Java
-Bytecodes in the form of a {\tt .class} file equivalent to the
-provided binary.  This {\tt .class} file contains a single class which
-implements the {\tt Runtime} interface.  This class may then be
-instantiated; invoking the {\tt execute()} method is equivalent to
-invoking the {\tt main()} function in the original binary.
-
-
-\subsection{Why MIPS?}
-
-We chose MIPS as a source format for two primary reasons: the
-availability of tools to translate legacy code into MIPS binaries, and
-the close similarity between the MIPS ISA and the Java Virtual Machine.
-
-The MIPS architecture has been around for quite some time, and is well
-supported by the GNU Compiler Collection, which is capable of
-compiling C, C++, Java, Fortran, Pascal (with p2c), and Objective C
-into MIPS binaries.
-
-The MIPS R1000 ISA bears a striking similarity to the Java Virtual
-Machine.  This early revision of the MIPS supports only 32-bit aligned
-loads and stores from memory, which is precisely the access model
-supported by Java for {\tt int[]}s.
-
-Cover dynamic page allocation.
-
-Cover stack setup.
-
-Brian, are there any other fortunate similarities we should mention
-here?  I seem to remember there being a bunch, but I can't recall them
-right now; it's been a while since I dealt with this stuff in detail.
-
-
-\subsection{Interpreter}
-
-\begin{itemize}
-\item slow, but easy to write
-\end{itemize}
-
-
-\subsection{Compiling to Java Source}
-\begin{itemize}
-\item performance boost
-\item pushes the limits of {\tt javac} and {\tt jikes}
-\end{itemize}
-
-\subsection{Compiling directly to Java Bytecode}
-\begin{itemize}
-\item further performance boost (quantify)
-\item brian, can you add any comments here?
-\end{itemize}
-
-\subsection{Interfacing with Java Code}
-
-Java source code can create a copy of the translated binary by
-instantiating the corresponding class, which extends {\tt Runtime}.
-Invoking the {\tt main()} method on this class is equivalent to
-calling the {\tt main()} function within the binary; the {\tt String}
-arguments to this function are copied into the binary's memory space
-and made available as {\tt argv**} and {\tt argc}.
-
-The translated binary communicates with the rest of the VM by
-executing MIPS {\tt SYSCALL} instructions, which are translated into
-invocations of the {\tt syscall()} method.  This calls back to the
-native Java world, which can manipulate the binary's environment by
-reading and writing to its memory space, checking its exit status,
-pausing the VM, and restarting the VM.
-
-
-\subsection{Virtualization}
-
-The {\tt Runtime} class implements the majority of the standard {\tt
-libc} syscalls, providing a complete interface to the filesystem,
-network socket library, time of day, (Brian: what else goes here?).
-
-\begin{itemize}
-\item ability to provide the same interface to CNI code and mips2javaified code
-\item security advantages (chroot the {\tt fork()}ed process)
-\end{itemize}
-
-\section{Related Work}
-
-\subsection{Source-to-Source translators}
-
-A number of commercial products and research projects attempt to
-translate C++ code to Java code, preserving the mapping of C++ classes
-to Java classes.  Unfortunately this is problematic since there is no
-way to do pointer arithmetic except within arrays, and even in that
-case, arithmetic cannot be done in terms of fractional objects.
-
-Mention gcc backend
-
-c2java
-
-Many of these products advise the user to tweak the code which results
-from the translation.  Unfortunately, hand-modifying machine-generated
-code is generally a bad idea, since this modification cannot be
-automated.  This means that every time the origin code changes, the
-code generator must be re-run, and the hand modifications must be
-performed yet again.  This is an error-prone process.
-
-Furthermore, Mips2Java does not attempt to read C code directly.  This
-frees it from the complex task of faithfully implementing the ANSI C
-standard (or, in the case of non ANSI-C compliant code, some other
-interface).  This also saves the user the chore of altering their
-build process to accomodate Mips2Java.
-
-
-\section{Performance}
-
-\subsection{Charts}
-
-(Note that none of these libraries have pure-Java equivalents.)
-
-\begin{itemize}
-\item libjpeg
-\item libfreetype
-\item libmspack
-\end{itemize}
-
-
-\subsection{Optimizations}
-
-Brian, can you write something to go here?  Just mention which
-optimizations helped and which ones hurt.
-
-\begin{itemize}
-\item trampoline
-\item optimal method size
-\item -msingle-float
-\item -mmemcpy
-\item fastmem
-\item local vars for registers (useless)
-\item -fno-rename-registers
-\item -ffast-math
-\item -fno-trapping-math
-\item -fsingle-precision-constant
-\item -mfused-madd
-\item -freg-struct-return
-\item -freduce-all-givs
-\item -fno-peephole
-\item -fno-peephole2
-\item -fmove-all-movables
-\item -fno-sched-spec-load
-\item -fno-sched-spec
-\item -fno-schedule-insns
-\item -fno-schedule-insns2
-\item -fno-delayed-branch
-\item -fno-function-cse
-\item -ffunction-sections
-\item -fdata-sections
-\item array bounds checking
-\item -falign-functions=n
-\item -falign-labels=n
-\item -falign-loops=n
-\item -falign-jumps=n
-\item -fno-function-cse
-\end{itemize}
-
-\section{Future Directions}
-
-World domination.
-
-\section{Conclusion}
-
-We rock the hizzouse.
-
-\section{References}
-
-Yer mom.
-
-\end{document}
-
diff --git a/doc/nestedvm.ivme04.tex b/doc/nestedvm.ivme04.tex
new file mode 100644
index 0000000..b4c96a1
--- /dev/null
+++ b/doc/nestedvm.ivme04.tex
@@ -0,0 +1,708 @@
+%
+% FIXME: - Add something about size limits on the constant pool
+%          how we worked around that and the performance impact
+%          of -o lessconstants
+%        - Add something about encoding data sections as string constants
+%          and the UTF8 non-7-bit-ascii penalty 
+%
+
+\documentclass{acmconf}
+\usepackage{graphicx}
+\usepackage{amssymb,amsmath,epsfig,alltt}
+\sloppy         % better line breaks
+\usepackage{palatino}
+\usepackage{parskip}
+\usepackage{tabularx}
+\usepackage{alltt}
+\bibliographystyle{alpha}
+
+\title{\textbf{\textsf{
+Running Legacy C/C++ Libraries in a Pure Java Environment
+}}}
+\date{}
+\author{\begin{tabular}{@{}c@{}}
+        {\em {Brian Alliet}} \\ \\
+        {{\it Affiliation Goes Here}}\relax
+   \end{tabular}\hskip 1in\begin{tabular}{@{}c@{}}
+        {\em {Adam Megacz}} \\ \\
+        {UC Berkeley}\relax
+\end{tabular}}
+\begin{document}
+
+\maketitle
+
+\begin{abstract}
+Abstract
+\end{abstract}
+
+\section{Introduction}
+
+\subsection{Why would you want to do this?}
+
+The C programming language has been around for over 30 years now.
+There is a huge library of software written in this language.  By
+contrast, Java has been around for less than ten years.  Although it
+offers substantial advantages over C, the set of libraries written in
+this language still lags behind C/C++.
+
+The typical solution to this dilemma is to use JNI or CNI to invoke C
+code from within a Java VM.  Unfortunately, there are a number of
+situations in which this is not an acceptable solution:
+
+\begin{itemize}
+\item Java Applets are not permitted to invoke {\tt Runtime.loadLibrary()}
+\item Java Servlet containers with a {\tt SecurityManager} will not
+      permit loading new JNI libraries.  This configuration is popular
+      with {\it shared hosting} providers and corporate intranets
+      where a number of different parties contribute individual web
+      applications which are run together in a single container.
+\item JNI requires the native library to be compiled ahead of time,
+      separately, for every architecture on which it will be deployed.
+      This is unworkable for situations in which the full set of
+      target architectures is not known at deployment time.
+\item The increasingly popular J2ME platform does not support JNI or CNI.
+\item Unlike Java Bytecode, JNI code is susceptible to buffer overflow
+      and heap corruption attacks.  This can be a major security
+      vulnerability.
+\item JNI often introduces undesirable added complexity to an
+      application.
+\end{itemize}
+
+The technique we present here is based on using a typical ANSI C
+compiler to compile C/C++ code into a MIPS binary, and then using a
+tool to translate that binary on an instruction-by-instruction basis
+into Java bytecode.
+
+The technique presented here is general; we anticipate that it can be
+applied to other secure virtual machines such as Microsoft's .NET.
+
+
+\section{Existing Work: Source-to-Source Translation}
+
+\begin{itemize}
+\item c2java
+\item commercial products
+\end{itemize}
+
+\section{Mips2Java: Binary-to-Binary Translation}
+
+We present Mips2Java, a binary-to-binary translation tool to convert
+MIPS binaries into Java bytecode files.
+
+The process of utilizing Mips2Java begins by using any compiler for
+any language to compile the source library into a statically linked
+MIPS binary.  We used {\tt gcc 3.3.3}, but any compiler which can
+target the MIPS platform should be acceptable.  The binary is
+statically linked with a system library (in the case of C code this is
+{\tt libc}) which translates system requests (such as {\tt open()},
+{\tt read()}, or {\tt write()}) into appropriate invocations of the
+MIPS {\tt SYSCALL} instruction.
+
+The statically linked MIPS binary is then fed to the Mips2Java tool
+(which is itself written in Java), which emits a sequence of Java
+Bytecodes in the form of a {\tt .class} file equivalent to the
+provided binary.  This {\tt .class} file contains a single class which
+implements the {\tt Runtime} interface.  This class may then be
+instantiated; invoking the {\tt execute()} method is equivalent to
+invoking the {\tt main()} function in the original binary.
+
+
+\subsection{Why MIPS?}
+
+We chose MIPS as a source format for two primary reasons: the
+availability of tools to translate legacy code into MIPS binaries, and
+the close similarity between the MIPS ISA and the Java Virtual Machine.
+
+The MIPS architecture has been around for quite some time, and is well
+supported by the GNU Compiler Collection, which is capable of
+compiling C, C++, Java, Fortran, Pascal (with p2c), and Objective C
+into MIPS binaries.
+
+The MIPS R1000 ISA bears a striking similarity to the Java Virtual
+Machine.  This early revision of the MIPS supports only 32-bit aligned
+loads and stores from memory, which is precisely the access model
+supported by Java for {\tt int[]}s.
+
+Cover dynamic page allocation.
+
+Cover stack setup.
+
+Brian, are there any other fortunate similarities we should mention
+here?  I seem to remember there being a bunch, but I can't recall them
+right now; it's been a while since I dealt with this stuff in detail.
+
+
+\subsection{Interpreter}
+
+\begin{itemize}
+\item slow, but easy to write
+\end{itemize}
+
+
+\subsection{Compiling to Java Source}
+\begin{itemize}
+\item performance boost
+\item pushes the limits of {\tt javac} and {\tt jikes}
+\end{itemize}
+
+\subsection{Compiling directly to Java Bytecode}
+\begin{itemize}
+\item further performance boost (quantify)
+\item brian, can you add any comments here?
+\end{itemize}
+
+\subsection{Interfacing with Java Code}
+
+Java source code can create a copy of the translated binary by
+instantiating the corresponding class, which extends {\tt Runtime}.
+Invoking the {\tt main()} method on this class is equivalent to
+calling the {\tt main()} function within the binary; the {\tt String}
+arguments to this function are copied into the binary's memory space
+and made available as {\tt argv**} and {\tt argc}.
+
+The translated binary communicates with the rest of the VM by
+executing MIPS {\tt SYSCALL} instructions, which are translated into
+invocations of the {\tt syscall()} method.  This calls back to the
+native Java world, which can manipulate the binary's environment by
+reading and writing to its memory space, checking its exit status,
+pausing the VM, and restarting the VM.
+
+
+\subsection{Virtualization}
+
+The {\tt Runtime} class implements the majority of the standard {\tt
+libc} syscalls, providing a complete interface to the filesystem,
+network socket library, time of day, (Brian: what else goes here?).
+
+\begin{itemize}
+\item ability to provide the same interface to CNI code and mips2javaified code
+\item security advantages (chroot the {\tt fork()}ed process)
+\end{itemize}
+
+\section{Related Work}
+
+\subsection{Source-to-Source translators}
+
+A number of commercial products and research projects attempt to
+translate C++ code to Java code, preserving the mapping of C++ classes
+to Java classes.  Unfortunately this is problematic since there is no
+way to do pointer arithmetic except within arrays, and even in that
+case, arithmetic cannot be done in terms of fractional objects.
+
+Mention gcc backend
+
+c2java
+
+Many of these products advise the user to tweak the code which results
+from the translation.  Unfortunately, hand-modifying machine-generated
+code is generally a bad idea, since this modification cannot be
+automated.  This means that every time the origin code changes, the
+code generator must be re-run, and the hand modifications must be
+performed yet again.  This is an error-prone process.
+
+Furthermore, Mips2Java does not attempt to read C code directly.  This
+frees it from the complex task of faithfully implementing the ANSI C
+standard (or, in the case of non ANSI-C compliant code, some other
+interface).  This also saves the user the chore of altering their
+build process to accomodate Mips2Java.
+
+
+\section{Performance}
+
+\subsection{Charts}
+
+(Note that none of these libraries have pure-Java equivalents.)
+
+\begin{itemize}
+\item libjpeg
+\item libfreetype
+\item libmspack
+\end{itemize}
+
+
+\subsection{Optimizations}
+
+Brian, can you write something to go here?  Just mention which
+optimizations helped and which ones hurt.
+
+\begin{itemize}
+\item trampoline
+\item optimal method size
+\item -msingle-float
+\item -mmemcpy
+\item fastmem
+\item local vars for registers (useless)
+\item -fno-rename-registers
+\item -ffast-math
+\item -fno-trapping-math
+\item -fsingle-precision-constant
+\item -mfused-madd
+\item -freg-struct-return
+\item -freduce-all-givs
+\item -fno-peephole
+\item -fno-peephole2
+\item -fmove-all-movables
+\item -fno-sched-spec-load
+\item -fno-sched-spec
+\item -fno-schedule-insns
+\item -fno-schedule-insns2
+\item -fno-delayed-branch
+\item -fno-function-cse
+\item -ffunction-sections
+\item -fdata-sections
+\item array bounds checking
+\item -falign-functions=n
+\item -falign-labels=n
+\item -falign-loops=n
+\item -falign-jumps=n
+\item -fno-function-cse
+\end{itemize}
+
+\section{Future Directions}
+
+World domination.
+
+\section{Conclusion}
+
+We rock the hizzouse.
+
+\section{References}
+
+Yer mom.
+
+\section{stuff}
+\begin{verbatim}
+
+libjpeg (render thebride_1280.jpg)
+Native -  0.235s
+JavaSource - 1.86s
+ClassFile - 1.37s
+
+freetype (rendering characters 32-127 of Comic.TTF at sizes from 8 to
+48 incrementing by 4)
+Native - 0.201s
+JavaSource - 2.02s
+ClassFile - 1.46s
+
+Section 3.2  - Interpreter
+The Interpreter was the first part of Mips2Java to be written.  This was the most 
+straightforward and simple way to run MIPS binaries inside the JVM.  The simplicity of the 
+interpreter also made it very simple to debug. Debugging machine-generated code is a pain. 
+Most importantly, the interpreter provided a great reference to use when developing the 
+compiler. With known working implementations of each MIPS instruction in Java writing a 
+compiler became a matter of simple doing the same thing ahead of time.
+With the completion of the compiler the interpreter in Mips2Java has become less useful. 
+However, it may still have some life left in it. One possible use is remote debugging with 
+GDB. Although debugging the compiler generated JVM code is theoretically possible, it 
+would be far easier to do in the interpreter. The interpreter may also be useful in cases 
+where size is far more important than speed. The interpreter is very small. The interpreter 
+and MIPS binary combined are smaller than the compiled classfiles.
+Section 3.3 - Compiling to Java Source
+The next major step in Mips2JavaÕs development was the Java source compiler. This 
+generated Java source code (compliable with javac or Jikes) from a MIPS binary. 
+Generating Java source code was preferred initially over JVM bytecode for two reasons. 
+The authors werenÕt very familiar with JVM bytecode and therefore generating Java source 
+code was simpler. Generating source code also eliminated the need to do trivial 
+optimizations in the Mips2java compiler that javac and Jikes already do. This mainly 
+includes 2+2=4 stuff. For example, the MIPS register r0 is immutable and always 0. This 
+register is represented by a static final int in the Java source compiler. Javac and Jikes 
+automatically handle optimizing this away when possible. In the JVM bytecode compiler 
+these optimizations needs to be done in Mips2Java.
+Early versions of the Mips2Java compiler were very simple. All 32 MIPS GPRs and a 
+special PC register were fields in the generated java class. There was a run() method 
+containing all the instructions in the .text segment converted to Java source code. A switch 
+statement was used to allow jumps from instruction to instruction. The generated code 
+looked something like this.
+private final static int r0 = 0;
+private int r1, r2, r3,...,r30;
+private int r31 = 0xdeadbeef;
+private int pc = ENTRY_POINT;
+
+public void run() {
+	for(;;) {
+	switch(pc) {
+		case 0x10000:
+			r29 = r29 Ð 32;
+		case 0x10004:
+			r1 = r4 + r5;
+		case 0x10008:
+			if(r1 == r6) {
+				/* delay slot */
+				r1 = r1 + 1;
+				pc = 0x10018:
+				continue;
+			}
+		case 0x1000C:
+			r1 = r1 + 1;
+		case 0x10010:
+			r31 = 0x10018;
+			pc = 0x10210;
+			continue;
+		case 0x10014:
+			/* nop */
+		case 0x10018:
+			pc = r31;
+			continue;
+		...
+		case 0xdeadbeef:
+			System.err.println("Exited from ENTRY_POINT");
+			System.err.println("R2: " + r2);
+			System.exit(1);
+	}
+	}
+}
+
+This worked fine for small binaries but as soon as anything substantial was fed to it the 64k 
+JVM method size limit was soon hit. The solution to this was to break up the code into 
+many smaller methods. This required a trampoline to dispatch  jumps to the appropriate 
+method. With the addition of the trampoline the generated code looked something like this:
+public void run_0x10000() {
+	for(;;) {
+	switch(pc) {
+		case 0x10000:
+			...
+		case 0x10004:
+			...
+		...
+		case 0x10010:
+			r31 = 0x10018;
+			pc = 0x10210;
+			return;
+		...
+	}
+	}
+}
+
+pubic void run_0x10200() {
+	for(;;) {
+	switch(pc) {
+		case 0x10200:
+			...
+		case 0x10204:
+			...
+	}
+	}
+}
+
+public void trampoline() {
+	for(;;) {
+	switch(pc&0xfffffe00) {
+			case 0x10000: run_0x10000(); break;
+			case 0x10200: run_0x10200(); break;
+			case 0xdeadbe00:
+				...
+		}
+	}
+}
+With this trampoline in place somewhat large binaries could be handled without much 
+difficulty. There is no limit on the size of a classfile as a whole, just individual methods. 
+This method should scale well. However, there are other classfile limitations that will limit 
+the size of compiled binaries.
+Another interesting problem that was discovered while creating the trampoline method was 
+javac and JikesÕ incredible stupidity when dealing with switch statements. The follow code 
+fragment gets compiled into a lookupswich by javac:
+Switch(pc&0xffffff00) {
+	Case 0x00000100: run_100(); break;
+	Case 0x00000200: run_200(); break;
+	Case 0x00000300: run_300(); break;
+}
+while this nearly identical code fragment gets compiled to a tableswitch
+switch(pc>>>8) {
+	case 0x1: run_100(); break
+	case 0x2: run_200(); break;
+	case 0x3: run_300(); break;
+}
+Javac isnÕt smart enough to see the patter in the case values and generates very suboptimal 
+bytecode. Manually doing the shifts convinces javac to emit a tableswitch statement, which 
+is significantly faster. This change alone nearly doubled the speed of the compiled binary.
+Finding the optimal method size lead to the next big performance increase.  It was 
+determined with experimentation that the optimal number of MIPS instructions per method 
+is 128 (considering only power of two options). Going above or below that lead to 
+performance decreases. This is most likely due to a combination of two factors.
+_ The two levels  of switch statements jumps have to pass though Ð The first switch 
+statement jumps go through is the trampoline switch statement. This is implemented 
+as a TABLESWITCH in JVM bytecode so it is very fast. The second level switch 
+statement in the individual run_ methods is implemented as a LOOKUPSWITCH, 
+which is much slower. Using smaller methods puts more burden on the faster 
+TABLESWITCH and less on the slower LOOKUPSWITCH.
+_ JIT compilers probably favor smaller methods smaller methods are easier to compile 
+and are probably better candidates for JIT compilation than larger methods.
+FIXME: Put a chart here
+The next big optimization was eliminating unnecessary case statements. Having case 
+statements before each instruction prevents JIT compilers from being able to optimize 
+across instruction boundaries. In order to eliminate unnecessary case statements every 
+possible address that could be jumped to directly needed to be identified. The sources for 
+possible jump targets come from 3 places.
+_ The .text segment Ð Every instruction in the text segment in scanned for jump 
+targets. Every branch instruction (BEQ, JAL, etc) has its destination added to the list 
+of possible branch targets. In addition, functions that set the link register have 
+theirpc+8 added to the list (the address that wouldÕve been put to the link register). 
+Finally, combinations of LUI (Load Upper Immediate) of ADDIU (Add Immediate 
+Unsigned) are scanned for possible addresses in the text segment. This combination 
+of instructions is often used to load a 32-bit word into a register.
+_ The .data segment Ð When GCC generates switch() statements it often uses a jump 
+table stored in the .data segment. Unfortunately gcc doesnÕt identify these jump 
+tables in any way. Therefore, the entire .data segment is conservatively scanned for 
+possible addresses in the .text segment.
+_ The symbol table Ð This is mainly used as a backup. Scanning the .text and .data 
+segments should identify any possible jump targets but adding every function in the 
+symbol table in the ELF binary doesnÕt hurt. This will also catch functions that are 
+never called directly from the MIPS binary (for example, functions called with the 
+call() method in the runtime).
+Eliminating unnecessary case statements provided a 10-25% speed increase .
+Despite all the above optimizations and workaround an impossible to workaround hard 
+classfile limit was eventually hit, the constant pool. The constant pool in classfiles is limited 
+to 65536 entries. Every Integer with a magnitude greater than 32767 requires an entry in the 
+constant pool. Every time the compiler emits a jump/branch instruction the PC field is set to 
+the branch target. This means nearly every branch instruction requires an entry in the 
+constant pool. Large binaries hit this limit fairly quickly. One workaround that was 
+employed in the Java source compiler was to express constants as offsets from a few central 
+values. For example: "pc = N_0x00010000 + 0x10" where N_0x000100000 is a non-
+final field to prevent javac from inlining it. This was sufficient to get reasonable large 
+binaries to compile. It has a small (approximately 5%) performance impact on the generated 
+code. It also makes the generated classfile somewhat larger. Fortunately, the classfile 
+compiler eliminates this problem.
+3.4 Ð Bytecode compiler
+The next step in the evolution of Mips2Java was to compile directly to JVM bytecode 
+eliminating the intermediate javac step. This had several advantages
+_ There are little tricks that can be done in JVM bytecode that canÕt be done in Java 
+source code.
+_ Eliminates the time-consuming javac step Ð Javac takes a long time to parse and 
+compile the output from the java source compiler.
+_ Allows for MIPS binaries to be compiled and loaded into a running VM using a 
+class loader. This eliminates the need to compile the binaries ahead of time. 
+By generating code at the bytecode level  there are many areas where the compiler can be 
+smarter than javac. Most of the areas where improvements where made where in the 
+handling of branch instructions and in taking advantage of the JVM stack to eliminate 
+unnecessary LOADs and STOREs to local variables.
+The first obvious optimization that generating bytecode allows for is the use of GOTO. 
+Despite the fact that java doesnÕt have a GOTO keyword  a GOTO bytecode does exist and 
+is used heavily in the code generates by javac. Unfortunately the java language doesnÕt 
+provide any way to take advantage of this. As result of this jumps within a method were 
+implemented by setting the PC field to the new address and making a trip back to the initial 
+switch statement.  In the classfile compiler these jumps are implemented as GOTOs directly 
+to the target instruction. This saves a costly trip back through the LOOKUPSWITCH 
+statement and is a huge win for small loops within a method.
+Somewhat related to using GOTO is the ability to optimize branch statements. In the Java 
+source compiler branch statements are implemented as follows (delay slots are ignored for 
+the purpose of this example)
+If(condition) { pc = TARGET; continue; }
+This requires a branch in the JVM regardless of whether the MIPS branch is actually taken. 
+If condition is false the JVM has to jump over the code to set the PC and go back to the 
+switch block. If condition is true the JVM as to jump to the switch block. By generating 
+bytecode directly we can make the target of the JVM branch statement the actual bytecode 
+of the final destination. In the case where the branch isnÕt taken the JVM doesnÕt need to 
+branch at all.
+A side affect of the above two optimizations is a solution to the excess constant pool entries 
+problem. When jumps are implemented as GOTOs and direct branches to the target the PC 
+field doesnÕt need to be set. This eliminates many of the constant pool entries the java 
+source compiler requires. The limit is still there however, and given a large enough binary it 
+will still be reached.
+Delay slots are another area where things are done somewhat inefficiently in the Java source 
+compiler. In order to take advantage of instructions already in the pipeline MIPS cpu have a 
+"delay slot". That is, an instruction after a branch or jump instruction that is executed 
+regardless of whether the branch is taken. This is done because by the time the branch or 
+jump instruction is finished being processes the next instruction is already ready to be 
+executed and it is wasteful to discard it. (However, newer  MIPS CPUs have pipelines that 
+are much larger than early MIPS CPUs so they have to discard many instructions anyway.) 
+As a result of this the instruction in the delay slot is actually executed BEFORE the branch 
+is taken. To make things even more difficult, values from the register file are loaded 
+BEFORE the delay slot is executed.  Here is a small piece of MIPS assembly:
+ADDIU r2,r0,-1
+BLTZ r2, target
+ADDIU r2,r2,10
+...
+:target
+This piece of code is executed as follows
+1. r2 is set to Ð1
+2. r2 is loaded from the register file by the BLTEZ instruction
+3. 10 is added to r2 by the ADDIU instruction
+4. The branch is taken because at the time the BLTZ instruction was executed r2 was 
+Ð1, but r2 is now 9 (-1 + 10)
+There is a very element solution to this problem when using JVM bytecode. When a branch 
+instruction is encountered the registers needed for the comparison are pushed onto the stack 
+to prepare for the JVM branch instruction. Then, AFTER the values are on the stack the 
+delay slot is emitted, and then finally the actual JVM branch instruction. Because the values 
+were pushed to the stack before the delay slot was executed any changes the delay slot made 
+to the registers are not visible to the branch bytecode. This allows delay slots to be used 
+with no performance penalty or size penalty. 
+One final advantage that generating bytecode directly allows is smaller more compact 
+bytecode. All the optimization above lead to smaller bytecode as a side effect. There are also 
+a few other areas where the generated bytecode can be optimized for size with more 
+knowledge of the program as a whole.
+When encountering the following switch block both javac and Jikes generate redundant 
+bytecode.
+Switch(pc>>>8) {
+	Case 0x1: run_1(); break;
+	Case 0x2: run_2(); break
+	...
+	case 0x100: run_100(); break;
+}
+The first bytecode in each case arm in the switch statement is ALOAD_0 to prepare for a 
+invoke special call. By simple moving this outside the switch statement  each case arm was 
+reduced in size by one instruction. Similar optimizations were also done in other parts of the 
+compiler.
+
+Section 3 
+- Adam - The method is run(), not execute. Execute() is only used when you need to 
+resume from a pause syscall.
+
+Section 3.1
+- Adam - Even the R1000 supports LB/SB/LH/SH/LWL/LWR Ð saying it only supports 
+32-bit aligned loads is incorrect.
+- Other similarities
+o All the branching instructions in MIPS directly map to single JVM instructions.
+o Most of the ALU instructions map to single JVM instructions.
+
+(I can write up some stuff for each of these next several sections if you want)
+Section 3.2  - Interpreter
+- Originally written mainly to understand the MIPS instruction set
+- Far easier to debug than an ahead of time compiler (no waiting, can throw in quick 
+hacks like if(pc >= 0xbadc0de && pc <= 0xbadcfff) debugOutput() ), donÕt need to 
+understand most of the ELF format)
+- Still could be useful
+o for GDB remote debugging
+o cases where size is more important than speed (size of interpreter + size of mips 
+binary < size of compiled binary or size of compiler + mips binary)
+o code which dynamically generates code (JIT compilers, etc). The ahead of time 
+compiler canÕt possibly handle this
+
+Section 3.3 Ð Compiling to Java Source
+- First version of an ahead of time compiler
+- Huge performance boost
+- Java source output preferred for the 2+2=4 type optimizations java compilers do
+- Worked well for small binaries Ð large MIPS binaries required ridiculous amounts of 
+memory to compile and often created  invalid classfiles
+- Too many constants Ð every jump operation required an entry in the constant pool (pc = 
+0xabcd1234; continue; )
+
+Section 3.4 Ð Compiling directly to JVM Bytecode
+- Another jump in performance
+- More efficient code can be created at the bytecode level
+o Information can be stored on the JVM stack rather than in local variables
+_ Javac/jikes often unnecessarily use local variables
+long tmp = a*b;
+lo = (int)tmp;
+hi = (int)(tmp>>>32)
+does a store and two loads when a simple DUP would suffice
+o GOTO can be used to branch directly  to branch destinations in the same method 
+rather than going through the switch block again.
+o BEQ, BGTZ, BLE, etc can jump directly to destination rather than doing 
+if(condition) { pc=0xabcd1234; continue; }
+o Eliminates excess constant pool entries (only jump outside the current method 
+require a constant pool entry)
+o Delay slots implemented more efficiently.
+_ Java source compiler does:
+if(condition) { /* delay slot /; pc = 0xabcd1234; continue; }
+/* delay slot again */
+_ This is required because the delay slot can change the registers used in 
+condition. The registers need to be read BEFORE the delay slot in executed.
+_ In the bytecode compiler the registers used in the condition are pushed to the 
+stack, then the delay slot is executed, and finally the comparison is done. 
+This eliminates the needs to output the delay slot twice.
+- Smaller bytecode
+o Everything mentioned above makes it smaller and faster
+o Javac/jikes add redundant code
+_ Switch(a) {
+   Case 1: Run_1000(); break;
+   Case 2: run_2000(); break;
+}
+Gets compiled into
+1 ÐTableswitch É. 
+2 Ð ALOAD_0
+3 Ð invokespecial  run_1000
+4 Ð goto end
+5 Ð ALOAD_0
+6 Ð invokespecial run_2000
+10 Ð goto end
+ALOAD_0 is unnecessarily put in each switch arm
+
+3.5 Interfacing with Java Code
+- _call_java ()/Runtime.call()
+o _call_java () - Call a java method from mips
+o Runime.call() Ð call a mips method from java
+o Easily allocate memory within the binaryÕs memory space by calling libcÕs malloc()
+o Can go back and forth between mips and java (java calls mips which calls java which 
+calls back into mips)
+- Java Strings can easily be converted to and from null terminated strings in the processÕ 
+memory space
+- Java InputStreams, OutputStreams, and Sockets can easily be turned in to standard 
+UNIX file descriptors (and ANSI FILE*s)
+- Can easily create custom filedescriptors and have full control over all operations on 
+them (read, write, seek, close, fstat, etc)
+- Int User_info[] Ð optional chunk of memory can very easily be accessed from java 
+(Runtime.getUserInfo/setUserInfo)
+
+3.6 Virtualization
+- Adam Ð we actually donÕt support sockets directly yet Ð you should probably take that 
+out. (But you can create a socket in java and expose it to mips as a filedescriptor)
+- Runtime services
+o Provides a easy to use interface to subclasses (Interpreter or compiles binaries)
+_ Subclasses only know how to execute instructions
+_ Runtime handles setting up registers/stack for execution to begin and 
+extracting return values and the exit status from the process
+o Memory management 
+_ Sets up stack and guard pages
+_ Allocates pages with the sbrk syscall
+_ Provide easy an memcpy like interface for accessing the processes memory 
+from java
+o Runtime.call() support Ð sets up registers,etc to prepare the process for a call into it 
+from java
+o Filesystem Ð open/close/read/write/seek/fcntl s syscalls
+o Time related functions Ð sleep, gettimeofday, times syscall
+o UnixRuntime provides a more complete unix-like environment (Runtime smaller 
+though)
+_ Supports fork() and waitpid()
+_ Pipe() for IPC
+_ More advocated filesystem interface
+_ All filesystem operations abstracted away into a FileSystem class
+o FileSystem class can be written that exposes a zip file, 
+directory on an http server, etc as a filesystem
+_ Chdir/getcwd
+_ /dev filesystem
+_ stat()
+_ directory listing support
+5.1 Charts
+IÕll put together some charts tonight
+5.2 Optimizations
+And finish this part
+
+Let me know if this was what you were looking for
+
+                                          libjpeg  libmspack libfreetype
+Interpreted MIPS Binary                   22.2      12.9      21.4
+Compled MIPS Binary (fastest options)     3.39      2.23      4.31
+Native -O3                                0.235    0.084     0.201
+
+Compled - with all case statements        3.50      2.30      4.99
+Compiled - with pruned case statement     3.39      2.23      4.31
+
+Compiled - 512 instructions/method        62.7      27.7      56.9
+Compiled - 256 instructions/method        3.54      2.55      4.43
+Compiled - 128 instructions/method        3.39      2.23      4.31
+Compiled - 64 instructions/method         3.56      2.31      4.40
+Compiled - 32 instruction/method          3.71      2.46      4.64
+
+Compild MIPS Binary (Server VM)           3.21      2.00      4.54
+Compiled MIPS Binary (Client VM)          3.39      2.23      4.31
+
+All times are measured in seconds. These were all run on a dual 1ghz G4
+running OS X 10.3.1 with Apple's latest VM (JDK 1.4.1_01-27). Each test
+was run 8 times within a single VM. The highest and lowest times were
+removed and the remaining 6 were averaged. In each case only the first
+run differed significantly from the rest.
+
+The libjpeg test consisted of decoding a 1280x1024 jpeg
+(thebride_1280.jpg) and writing a tga. The mspack test consisted of
+extracting all members from arial32.exe, comic32.exe, times32.exe, and
+verdan32.exe. The freetype test consisted of rendering characters
+32-127 of Comic.TTF at sizes from 8 to 48 incrementing by 4. (That is
+about 950 individual glyphs).
+
+I can provide you with the source for any of these test if you'd like.
+
+-Brian
+\end{verbatim}
+
+\end{document}
+
diff --git a/doc/performance.results.txt b/doc/performance.results.txt
deleted file mode 100644
index f35f050..0000000
--- a/doc/performance.results.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Let me know if this was what you were looking for
-
-                                          libjpeg  libmspack libfreetype
-Interpreted MIPS Binary                   22.2      12.9      21.4
-Compled MIPS Binary (fastest options)     3.39      2.23      4.31
-Native -O3                                0.235    0.084     0.201
-
-Compled - with all case statements        3.50      2.30      4.99
-Compiled - with pruned case statement     3.39      2.23      4.31
-
-Compiled - 512 instructions/method        62.7      27.7      56.9
-Compiled - 256 instructions/method        3.54      2.55      4.43
-Compiled - 128 instructions/method        3.39      2.23      4.31
-Compiled - 64 instructions/method         3.56      2.31      4.40
-Compiled - 32 instruction/method          3.71      2.46      4.64
-
-Compild MIPS Binary (Server VM)           3.21      2.00      4.54
-Compiled MIPS Binary (Client VM)          3.39      2.23      4.31
-
-All times are measured in seconds. These were all run on a dual 1ghz G4
-running OS X 10.3.1 with Apple's latest VM (JDK 1.4.1_01-27). Each test
-was run 8 times within a single VM. The highest and lowest times were
-removed and the remaining 6 were averaged. In each case only the first
-run differed significantly from the rest.
-
-The libjpeg test consisted of decoding a 1280x1024 jpeg
-(thebride_1280.jpg) and writing a tga. The mspack test consisted of
-extracting all members from arial32.exe, comic32.exe, times32.exe, and
-verdan32.exe. The freetype test consisted of rendering characters
-32-127 of Comic.TTF at sizes from 8 to 48 incrementing by 4. (That is
-about 950 individual glyphs).
-
-I can provide you with the source for any of these test if you'd like.
-
--Brian
-
-- 
1.7.10.4