X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fgmp%2Fmpn%2Fpa64w%2Fsubmul_1.S;fp=rts%2Fgmp%2Fmpn%2Fpa64w%2Fsubmul_1.S;h=0000000000000000000000000000000000000000;hb=fdf1cd0399158308769fcb2ab7e46e215a68d865;hp=294f6239b2b1bd24670de97ddefe91a7b616a1e0;hpb=e552cfc427d2734b9a9629f2ab1d22f493e775f6;p=ghc-hetmet.git

diff --git a/rts/gmp/mpn/pa64w/submul_1.S b/rts/gmp/mpn/pa64w/submul_1.S
deleted file mode 100644
index 294f623..0000000
--- a/rts/gmp/mpn/pa64w/submul_1.S
+++ /dev/null
@@ -1,171 +0,0 @@
-; HP-PA 2.0 64-bit __gmpn_submul_1 -- Multiply a limb vector with a limb and
-; subtract the result from a second limb vector.
-
-; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
-
-; This file is part of the GNU MP Library.
-
-; The GNU MP Library is free software; you can redistribute it and/or modify
-; it under the terms of the GNU Lesser General Public License as published by
-; the Free Software Foundation; either version 2.1 of the License, or (at your
-; option) any later version.
-
-; The GNU MP Library is distributed in the hope that it will be useful, but
-; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-; License for more details.
-
-; You should have received a copy of the GNU Lesser General Public License
-; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-; MA 02111-1307, USA.
-
-; INPUT PARAMETERS
-#define rptr		%r26
-#define sptr		%r25
-#define size		%r24
-#define s2limb		%r23
-
-; This runs at 11 cycles/limb on a PA8000.  It might be possible to make
-; it faster, but the PA8000 pipeline is not publically documented and it
-; is very complex to reverse engineer
-
-#define t1 %r19
-#define rlimb %r20
-#define hi %r21
-#define lo %r22
-#define m0 %r28
-#define m1 %r3
-#define cylimb %r29
-#define t3 %r4
-#define t2 %r6
-#define t5 %r23
-#define t4 %r31
-	.level  2.0w
-	.code
-	.export __gmpn_submul_1,entry
-__gmpn_submul_1
-	.proc
-	.callinfo frame=128,no_calls
-	.entry
-	std		s2limb,-56(%r30)
-        fldd		-56(%r30),%fr5
-	ldo		128(%r30),%r30
-	add		%r0,%r0,cylimb		; clear cy and cylimb
-
-	std		%r3,-96(%r30)
-	std		%r4,-88(%r30)
-	std		%r5,-80(%r30)
-	std		%r6,-72(%r30)
-	depdi,z		1,31,1,%r5
-
-	fldd		0(sptr),%fr4
-	ldo		8(sptr),sptr
-
-	xmpyu		%fr5R,%fr4R,%fr6
-	fstd		%fr6,-128(%r30)
-	xmpyu		%fr5R,%fr4L,%fr7
-	fstd		%fr7,-120(%r30)
-	xmpyu		%fr5L,%fr4R,%fr8
-	fstd		%fr8,-112(%r30)
-	xmpyu		%fr5L,%fr4L,%fr9
-	fstd		%fr9,-104(%r30)
-	ldd		-128(%r30),lo		; lo = low 64 bit of product
-	ldd		-120(%r30),m0		; m0 = mid0 64 bit of product
-	ldd		-112(%r30),m1		; m1 = mid1 64 bit of product
-	ldd		-104(%r30),hi		; hi = high 64 bit of product
-	addib,=		-1,%r24,L$end1
-	nop
-	fldd		0(sptr),%fr4
-	ldo		8(sptr),sptr
-	addib,=		-1,%r24,L$end2
-	nop
-L$loop
-	xmpyu		%fr5R,%fr4R,%fr6
-	fstd		%fr6,-128(%r30)
-	xmpyu		%fr5R,%fr4L,%fr7
-	fstd		%fr7,-120(%r30)
-	xmpyu		%fr5L,%fr4R,%fr8
-	fstd		%fr8,-112(%r30)
-	xmpyu		%fr5L,%fr4L,%fr9
-	fstd		%fr9,-104(%r30)
-	ldd		0(rptr),rlimb
-	extrd,u		lo,31,32,t1		; t1 = hi32(lo)
-	extrd,u		lo,63,32,t4		; t4 = lo32(lo)
-	add,l		m0,t1,t1		; t1 += m0
-	add,l,*nuv	m1,t1,t1		; t1 += m1
-	 add,l		%r5,hi,hi		; propagate carry
-	extrd,u		t1,31,32,t2		; t2 = hi32(t1)
-	depd,z		t1,31,32,t5		; t5 = lo32(t1)
-	add,l		t5,t4,t4		; t4 += lo32(t1)
-	ldd		-128(%r30),lo		; lo = low 64 bit of product
-	add		cylimb,t4,t4
-	ldd		-120(%r30),m0		; m0 = mid0 64 bit of product
-	add,dc		t2,hi,cylimb
-	ldd		-112(%r30),m1		; m1 = mid1 64 bit of product
-	sub		rlimb,t4,t3
-	add		t4,t3,%r0
-	ldd		-104(%r30),hi		; hi = high 64 bit of product
-	add,dc		%r0,cylimb,cylimb
-	fldd		0(sptr),%fr4
-	ldo		8(sptr),sptr
-	std		t3,0(rptr)
-	addib,<>	-1,%r24,L$loop
-	ldo		8(rptr),rptr
-L$end2
-	xmpyu		%fr5R,%fr4R,%fr6
-	fstd		%fr6,-128(%r30)
-	xmpyu		%fr5R,%fr4L,%fr7
-	fstd		%fr7,-120(%r30)
-	xmpyu		%fr5L,%fr4R,%fr8
-	fstd		%fr8,-112(%r30)
-	xmpyu		%fr5L,%fr4L,%fr9
-	fstd		%fr9,-104(%r30)
-	ldd		0(rptr),rlimb
-	extrd,u		lo,31,32,t1		; t1 = hi32(lo)
-	extrd,u		lo,63,32,t4		; t4 = lo32(lo)
-	add,l		m0,t1,t1		; t1 += m0
-	add,l,*nuv	m1,t1,t1		; t1 += m0
-	 add,l		%r5,hi,hi		; propagate carry
-	extrd,u		t1,31,32,t2		; t2 = hi32(t1)
-	depd,z		t1,31,32,t5		; t5 = lo32(t1)
-	add,l		t5,t4,t4		; t4 += lo32(t1)
-	ldd		-128(%r30),lo		; lo = low 64 bit of product
-	add		cylimb,t4,t4
-	ldd		-120(%r30),m0		; m0 = mid0 64 bit of product
-	add,dc		t2,hi,cylimb
-	ldd		-112(%r30),m1		; m1 = mid1 64 bit of product
-	sub		rlimb,t4,t3
-	add		t4,t3,%r0
-	ldd		-104(%r30),hi		; hi = high 64 bit of product
-	add,dc		%r0,cylimb,cylimb
-	std		t3,0(rptr)
-	ldo		8(rptr),rptr
-L$end1
-	ldd		0(rptr),rlimb
-	extrd,u		lo,31,32,t1		; t1 = hi32(lo)
-	extrd,u		lo,63,32,t4		; t4 = lo32(lo)
-	add,l		m0,t1,t1		; t1 += m0
-	add,l,*nuv	m1,t1,t1		; t1 += m0
-	 add,l		%r5,hi,hi		; propagate carry
-	extrd,u		t1,31,32,t2		; t2 = hi32(t1)
-	depd,z		t1,31,32,t5		; t5 = lo32(t1)
-	add,l		t5,t4,t4		; t4 += lo32(t1)
-	add		cylimb,t4,t4
-	add,dc		t2,hi,cylimb
-	sub		rlimb,t4,t3
-	add		t4,t3,%r0
-	add,dc		%r0,cylimb,cylimb
-	std		t3,0(rptr)
-	ldo		8(rptr),rptr
-
-	ldd		-96(%r30),%r3
-	ldd		-88(%r30),%r4
-	ldd		-80(%r30),%r5
-	ldd		-72(%r30),%r6
-
-	copy		cylimb,%r28
-	bve		(%r2)
-	.exit
-	ldo		-128(%r30),%r30
-	.procend