+++ /dev/null
-dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
-dnl
-dnl P5: 2.375 cycles/limb
-
-
-dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
-dnl Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 2.1 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public
-dnl License along with the GNU MP Library; see the file COPYING.LIB. If
-dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
-dnl Suite 330, Boston, MA 02111-1307, USA.
-
-
-include(`../config.m4')
-
-
-ifdef(`OPERATION_add_n',`
- define(M4_inst, adcl)
- define(M4_function_n, mpn_add_n)
- define(M4_function_nc, mpn_add_nc)
-
-',`ifdef(`OPERATION_sub_n',`
- define(M4_inst, sbbl)
- define(M4_function_n, mpn_sub_n)
- define(M4_function_nc, mpn_sub_nc)
-
-',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-
-C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size);
-C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size, mp_limb_t carry);
-
-defframe(PARAM_CARRY,20)
-defframe(PARAM_SIZE, 16)
-defframe(PARAM_SRC2, 12)
-defframe(PARAM_SRC1, 8)
-defframe(PARAM_DST, 4)
-
- .text
- ALIGN(8)
-PROLOGUE(M4_function_nc)
-
- pushl %edi
- pushl %esi
- pushl %ebx
- pushl %ebp
-deflit(`FRAME',16)
-
- movl PARAM_DST,%edi
- movl PARAM_SRC1,%esi
- movl PARAM_SRC2,%ebp
- movl PARAM_SIZE,%ecx
-
- movl (%ebp),%ebx
-
- decl %ecx
- movl %ecx,%edx
- shrl $3,%ecx
- andl $7,%edx
- testl %ecx,%ecx C zero carry flag
- jz L(endgo)
-
- pushl %edx
-FRAME_pushl()
- movl PARAM_CARRY,%eax
- shrl $1,%eax C shift bit 0 into carry
- jmp LF(M4_function_n,oop)
-
-L(endgo):
-deflit(`FRAME',16)
- movl PARAM_CARRY,%eax
- shrl $1,%eax C shift bit 0 into carry
- jmp LF(M4_function_n,end)
-
-EPILOGUE()
-
-
- ALIGN(8)
-PROLOGUE(M4_function_n)
-
- pushl %edi
- pushl %esi
- pushl %ebx
- pushl %ebp
-deflit(`FRAME',16)
-
- movl PARAM_DST,%edi
- movl PARAM_SRC1,%esi
- movl PARAM_SRC2,%ebp
- movl PARAM_SIZE,%ecx
-
- movl (%ebp),%ebx
-
- decl %ecx
- movl %ecx,%edx
- shrl $3,%ecx
- andl $7,%edx
- testl %ecx,%ecx C zero carry flag
- jz L(end)
- pushl %edx
-FRAME_pushl()
-
- ALIGN(8)
-L(oop): movl 28(%edi),%eax C fetch destination cache line
- leal 32(%edi),%edi
-
-L(1): movl (%esi),%eax
- movl 4(%esi),%edx
- M4_inst %ebx,%eax
- movl 4(%ebp),%ebx
- M4_inst %ebx,%edx
- movl 8(%ebp),%ebx
- movl %eax,-32(%edi)
- movl %edx,-28(%edi)
-
-L(2): movl 8(%esi),%eax
- movl 12(%esi),%edx
- M4_inst %ebx,%eax
- movl 12(%ebp),%ebx
- M4_inst %ebx,%edx
- movl 16(%ebp),%ebx
- movl %eax,-24(%edi)
- movl %edx,-20(%edi)
-
-L(3): movl 16(%esi),%eax
- movl 20(%esi),%edx
- M4_inst %ebx,%eax
- movl 20(%ebp),%ebx
- M4_inst %ebx,%edx
- movl 24(%ebp),%ebx
- movl %eax,-16(%edi)
- movl %edx,-12(%edi)
-
-L(4): movl 24(%esi),%eax
- movl 28(%esi),%edx
- M4_inst %ebx,%eax
- movl 28(%ebp),%ebx
- M4_inst %ebx,%edx
- movl 32(%ebp),%ebx
- movl %eax,-8(%edi)
- movl %edx,-4(%edi)
-
- leal 32(%esi),%esi
- leal 32(%ebp),%ebp
- decl %ecx
- jnz L(oop)
-
- popl %edx
-FRAME_popl()
-L(end):
- decl %edx C test %edx w/o clobbering carry
- js L(end2)
- incl %edx
-L(oop2):
- leal 4(%edi),%edi
- movl (%esi),%eax
- M4_inst %ebx,%eax
- movl 4(%ebp),%ebx
- movl %eax,-4(%edi)
- leal 4(%esi),%esi
- leal 4(%ebp),%ebp
- decl %edx
- jnz L(oop2)
-L(end2):
- movl (%esi),%eax
- M4_inst %ebx,%eax
- movl %eax,(%edi)
-
- sbbl %eax,%eax
- negl %eax
-
- popl %ebp
- popl %ebx
- popl %esi
- popl %edi
- ret
-
-EPILOGUE()