From: simonm Date: Fri, 5 Jun 1998 14:43:45 +0000 (+0000) Subject: [project @ 1998-06-05 14:43:44 by simonm] X-Git-Tag: Approx_2487_patches~601 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=a343265d8f6bf27a84bb16f68f9242b346dc5095;p=ghc-hetmet.git [project @ 1998-06-05 14:43:44 by simonm] Import GMP 2.0.2 --- diff --git a/ghc/rts/gmp/mpn/a29k/add_n.s b/ghc/rts/gmp/mpn/a29k/add_n.s new file mode 100644 index 0000000..74c20e3 --- /dev/null +++ b/ghc/rts/gmp/mpn/a29k/add_n.s @@ -0,0 +1,120 @@ +; 29000 __mpn_add -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + +; The 29k has no addition or subtraction instructions that doesn't +; affect carry, so we need to save and restore that as soon as we +; adjust the pointers. gr116 is used for this purpose. Note that +; gr116==0 means that carry should be set. + + .sect .lit,lit + .text + .align 4 + .global ___mpn_add_n + .word 0x60000 +___mpn_add_n: + srl gr117,lr5,3 + sub gr118,gr117,1 + jmpt gr118,Ltail + constn gr116,-1 ; init cy reg + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr104,lr4 + add lr4,lr4,32 + + subr gr116,gr116,0 ; restore carry + addc gr96,gr96,gr104 + addc gr97,gr97,gr105 + addc gr98,gr98,gr106 + addc gr99,gr99,gr107 + addc gr100,gr100,gr108 + addc gr101,gr101,gr109 + addc gr102,gr102,gr110 + addc gr103,gr103,gr111 + subc gr116,gr116,gr116 ; gr116 = not(cy) + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +; Code for the last up-to-7 limbs. +; This code might look very strange, but it's hard to write it +; differently without major slowdown. + + and lr5,lr5,(8-1) +Ltail: sub gr118,lr5,1 ; count for CR + jmpt gr118,Lend + sub gr117,lr5,2 ; count for jmpfdec + + mtsr cr,gr118 + loadm 0,0,gr96,lr3 + mtsr cr,gr118 + loadm 0,0,gr104,lr4 + + subr gr116,gr116,0 ; restore carry + + jmpfdec gr117,L1 + addc gr96,gr96,gr104 + jmp Lstore + mtsr cr,gr118 +L1: jmpfdec gr117,L2 + addc gr97,gr97,gr105 + jmp Lstore + mtsr cr,gr118 +L2: jmpfdec gr117,L3 + addc gr98,gr98,gr106 + jmp Lstore + mtsr cr,gr118 +L3: jmpfdec gr117,L4 + addc gr99,gr99,gr107 + jmp Lstore + mtsr cr,gr118 +L4: jmpfdec gr117,L5 + addc gr100,gr100,gr108 + jmp Lstore + mtsr cr,gr118 +L5: jmpfdec gr117,L6 + addc gr101,gr101,gr109 + jmp Lstore + mtsr cr,gr118 +L6: addc gr102,gr102,gr110 + +Lstore: storem 0,0,gr96,lr2 + subc gr116,gr116,gr116 ; gr116 = not(cy) + +Lend: jmpi lr0 + add gr96,gr116,1 diff --git a/ghc/rts/gmp/mpn/a29k/addmul_1.s b/ghc/rts/gmp/mpn/a29k/addmul_1.s new file mode 100644 index 0000000..8c0ec96 --- /dev/null +++ b/ghc/rts/gmp/mpn/a29k/addmul_1.s @@ -0,0 +1,113 @@ +; 29000 __mpn_addmul_1 -- Multiply a limb vector with a single limb and +; add the product to a second limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___mpn_addmul_1 + .word 0x60000 +___mpn_addmul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + loadm 0,0,gr96,lr2 + + add gr104,gr96,gr104 + addc gr105,gr97,gr105 + addc gr106,gr98,gr106 + addc gr107,gr99,gr107 + addc gr108,gr100,gr108 + addc gr109,gr101,gr109 + addc gr110,gr102,gr110 + addc gr111,gr103,gr111 + addc gr120,gr120,0 + + mtsrim cr,(8-1) + storem 0,0,gr104,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + load 0,0,gr119,lr2 + add gr117,gr117,gr120 + addc gr118,gr118,0 + add gr117,gr117,gr119 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy diff --git a/ghc/rts/gmp/mpn/a29k/lshift.s b/ghc/rts/gmp/mpn/a29k/lshift.s new file mode 100644 index 0000000..7554e2c --- /dev/null +++ b/ghc/rts/gmp/mpn/a29k/lshift.s @@ -0,0 +1,93 @@ +; 29000 __mpn_lshift -- + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + + .sect .lit,lit + .text + .align 4 + .global ___mpn_lshift + .word 0x60000 +___mpn_lshift: + sll gr116,lr4,2 + add lr3,gr116,lr3 + add lr2,gr116,lr2 + sub lr3,lr3,4 + load 0,0,gr119,lr3 + + subr gr116,lr5,32 + srl gr96,gr119,gr116 ; return value + sub lr4,lr4,1 ; actual loop count is SIZE - 1 + + srl gr117,lr4,3 ; chuck count = (actual count) / 8 + cpeq gr118,gr117,0 + jmpt gr118,Ltail + mtsr fc,lr5 + + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: sub lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr100,lr3 + + extract gr109,gr119,gr107 + extract gr108,gr107,gr106 + extract gr107,gr106,gr105 + extract gr106,gr105,gr104 + extract gr105,gr104,gr103 + extract gr104,gr103,gr102 + extract gr103,gr102,gr101 + extract gr102,gr101,gr100 + + sub lr2,lr2,32 + mtsrim cr,(8-1) + storem 0,0,gr102,lr2 + jmpfdec gr117,Loop + or gr119,gr100,0 + +; Code for the last up-to-7 limbs. + + and lr4,lr4,(8-1) +Ltail: cpeq gr118,lr4,0 + jmpt gr118,Lend + sub lr4,lr4,2 ; count for jmpfdec + +Loop2: sub lr3,lr3,4 + load 0,0,gr116,lr3 + extract gr117,gr119,gr116 + sub lr2,lr2,4 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + or gr119,gr116,0 + +Lend: extract gr117,gr119,0 + sub lr2,lr2,4 + jmpi lr0 + store 0,0,gr117,lr2 diff --git a/ghc/rts/gmp/mpn/a29k/mul_1.s b/ghc/rts/gmp/mpn/a29k/mul_1.s new file mode 100644 index 0000000..5d120f4 --- /dev/null +++ b/ghc/rts/gmp/mpn/a29k/mul_1.s @@ -0,0 +1,97 @@ +; 29000 __mpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___mpn_mul_1 + .word 0x60000 +___mpn_mul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + storem 0,0,gr104,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + add gr117,gr117,gr120 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy diff --git a/ghc/rts/gmp/mpn/a29k/rshift.s b/ghc/rts/gmp/mpn/a29k/rshift.s new file mode 100644 index 0000000..fe53b71 --- /dev/null +++ b/ghc/rts/gmp/mpn/a29k/rshift.s @@ -0,0 +1,89 @@ +; 29000 __mpn_rshift -- + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + + .sect .lit,lit + .text + .align 4 + .global ___mpn_rshift + .word 0x60000 +___mpn_rshift: + load 0,0,gr119,lr3 + add lr3,lr3,4 + + subr gr116,lr5,32 + sll gr96,gr119,gr116 ; return value + sub lr4,lr4,1 ; actual loop count is SIZE - 1 + + srl gr117,lr4,3 ; chuck count = (actual count) / 8 + cpeq gr118,gr117,0 + jmpt gr118,Ltail + mtsr fc,gr116 + + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr100,lr3 + add lr3,lr3,32 + + extract gr98,gr100,gr119 + extract gr99,gr101,gr100 + extract gr100,gr102,gr101 + extract gr101,gr103,gr102 + extract gr102,gr104,gr103 + extract gr103,gr105,gr104 + extract gr104,gr106,gr105 + extract gr105,gr107,gr106 + + mtsrim cr,(8-1) + storem 0,0,gr98,lr2 + add lr2,lr2,32 + jmpfdec gr117,Loop + or gr119,gr107,0 + +; Code for the last up-to-7 limbs. + + and lr4,lr4,(8-1) +Ltail: cpeq gr118,lr4,0 + jmpt gr118,Lend + sub lr4,lr4,2 ; count for jmpfdec + +Loop2: load 0,0,gr100,lr3 + add lr3,lr3,4 + extract gr117,gr100,gr119 + store 0,0,gr117,lr2 + add lr2,lr2,4 + jmpfdec lr4,Loop2 + or gr119,gr100,0 + +Lend: srl gr117,gr119,lr5 + jmpi lr0 + store 0,0,gr117,lr2 diff --git a/ghc/rts/gmp/mpn/a29k/sub_n.s b/ghc/rts/gmp/mpn/a29k/sub_n.s new file mode 100644 index 0000000..3c8d610 --- /dev/null +++ b/ghc/rts/gmp/mpn/a29k/sub_n.s @@ -0,0 +1,120 @@ +; 29000 __mpn_sub -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + +; The 29k has no addition or subtraction instructions that doesn't +; affect carry, so we need to save and restore that as soon as we +; adjust the pointers. gr116 is used for this purpose. Note that +; gr116==0 means that carry should be set. + + .sect .lit,lit + .text + .align 4 + .global ___mpn_sub_n + .word 0x60000 +___mpn_sub_n: + srl gr117,lr5,3 + sub gr118,gr117,1 + jmpt gr118,Ltail + constn gr116,-1 ; init cy reg + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr104,lr4 + add lr4,lr4,32 + + subr gr116,gr116,0 ; restore carry + subc gr96,gr96,gr104 + subc gr97,gr97,gr105 + subc gr98,gr98,gr106 + subc gr99,gr99,gr107 + subc gr100,gr100,gr108 + subc gr101,gr101,gr109 + subc gr102,gr102,gr110 + subc gr103,gr103,gr111 + subc gr116,gr116,gr116 ; gr116 = not(cy) + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +; Code for the last up-to-7 limbs. +; This code might look very strange, but it's hard to write it +; differently without major slowdown. + + and lr5,lr5,(8-1) +Ltail: sub gr118,lr5,1 ; count for CR + jmpt gr118,Lend + sub gr117,lr5,2 ; count for jmpfdec + + mtsr cr,gr118 + loadm 0,0,gr96,lr3 + mtsr cr,gr118 + loadm 0,0,gr104,lr4 + + subr gr116,gr116,0 ; restore carry + + jmpfdec gr117,L1 + subc gr96,gr96,gr104 + jmp Lstore + mtsr cr,gr118 +L1: jmpfdec gr117,L2 + subc gr97,gr97,gr105 + jmp Lstore + mtsr cr,gr118 +L2: jmpfdec gr117,L3 + subc gr98,gr98,gr106 + jmp Lstore + mtsr cr,gr118 +L3: jmpfdec gr117,L4 + subc gr99,gr99,gr107 + jmp Lstore + mtsr cr,gr118 +L4: jmpfdec gr117,L5 + subc gr100,gr100,gr108 + jmp Lstore + mtsr cr,gr118 +L5: jmpfdec gr117,L6 + subc gr101,gr101,gr109 + jmp Lstore + mtsr cr,gr118 +L6: subc gr102,gr102,gr110 + +Lstore: storem 0,0,gr96,lr2 + subc gr116,gr116,gr116 ; gr116 = not(cy) + +Lend: jmpi lr0 + add gr96,gr116,1 diff --git a/ghc/rts/gmp/mpn/a29k/submul_1.s b/ghc/rts/gmp/mpn/a29k/submul_1.s new file mode 100644 index 0000000..ca2ef72 --- /dev/null +++ b/ghc/rts/gmp/mpn/a29k/submul_1.s @@ -0,0 +1,116 @@ +; 29000 __mpn_submul_1 -- Multiply a limb vector with a single limb and +; subtract the product from a second limb vector. + +; Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___mpn_submul_1 + .word 0x60000 +___mpn_submul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + loadm 0,0,gr96,lr2 + + sub gr96,gr96,gr104 + subc gr97,gr97,gr105 + subc gr98,gr98,gr106 + subc gr99,gr99,gr107 + subc gr100,gr100,gr108 + subc gr101,gr101,gr109 + subc gr102,gr102,gr110 + subc gr103,gr103,gr111 + + add gr104,gr103,gr111 ; invert carry from previus sub + addc gr120,gr120,0 + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + load 0,0,gr119,lr2 + add gr117,gr117,gr120 + addc gr118,gr118,0 + sub gr119,gr119,gr117 + add gr104,gr119,gr117 ; invert carry from previus sub + store 0,0,gr119,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy