dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction. dnl dnl P5: 2.375 cycles/limb dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software dnl Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or dnl modify it under the terms of the GNU Lesser General Public License as dnl published by the Free Software Foundation; either version 2.1 of the dnl License, or (at your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dnl Lesser General Public License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. include(`../config.m4') ifdef(`OPERATION_add_n',` define(M4_inst, adcl) define(M4_function_n, mpn_add_n) define(M4_function_nc, mpn_add_nc) ',`ifdef(`OPERATION_sub_n',` define(M4_inst, sbbl) define(M4_function_n, mpn_sub_n) define(M4_function_nc, mpn_sub_nc) ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n ')')') MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, C mp_size_t size); C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, C mp_size_t size, mp_limb_t carry); defframe(PARAM_CARRY,20) defframe(PARAM_SIZE, 16) defframe(PARAM_SRC2, 12) defframe(PARAM_SRC1, 8) defframe(PARAM_DST, 4) .text ALIGN(8) PROLOGUE(M4_function_nc) pushl %edi pushl %esi pushl %ebx pushl %ebp deflit(`FRAME',16) movl PARAM_DST,%edi movl PARAM_SRC1,%esi movl PARAM_SRC2,%ebp movl PARAM_SIZE,%ecx movl (%ebp),%ebx decl %ecx movl %ecx,%edx shrl $3,%ecx andl $7,%edx testl %ecx,%ecx C zero carry flag jz L(endgo) pushl %edx FRAME_pushl() movl PARAM_CARRY,%eax shrl $1,%eax C shift bit 0 into carry jmp LF(M4_function_n,oop) L(endgo): deflit(`FRAME',16) movl PARAM_CARRY,%eax shrl $1,%eax C shift bit 0 into carry jmp LF(M4_function_n,end) EPILOGUE() ALIGN(8) PROLOGUE(M4_function_n) pushl %edi pushl %esi pushl %ebx pushl %ebp deflit(`FRAME',16) movl PARAM_DST,%edi movl PARAM_SRC1,%esi movl PARAM_SRC2,%ebp movl PARAM_SIZE,%ecx movl (%ebp),%ebx decl %ecx movl %ecx,%edx shrl $3,%ecx andl $7,%edx testl %ecx,%ecx C zero carry flag jz L(end) pushl %edx FRAME_pushl() ALIGN(8) L(oop): movl 28(%edi),%eax C fetch destination cache line leal 32(%edi),%edi L(1): movl (%esi),%eax movl 4(%esi),%edx M4_inst %ebx,%eax movl 4(%ebp),%ebx M4_inst %ebx,%edx movl 8(%ebp),%ebx movl %eax,-32(%edi) movl %edx,-28(%edi) L(2): movl 8(%esi),%eax movl 12(%esi),%edx M4_inst %ebx,%eax movl 12(%ebp),%ebx M4_inst %ebx,%edx movl 16(%ebp),%ebx movl %eax,-24(%edi) movl %edx,-20(%edi) L(3): movl 16(%esi),%eax movl 20(%esi),%edx M4_inst %ebx,%eax movl 20(%ebp),%ebx M4_inst %ebx,%edx movl 24(%ebp),%ebx movl %eax,-16(%edi) movl %edx,-12(%edi) L(4): movl 24(%esi),%eax movl 28(%esi),%edx M4_inst %ebx,%eax movl 28(%ebp),%ebx M4_inst %ebx,%edx movl 32(%ebp),%ebx movl %eax,-8(%edi) movl %edx,-4(%edi) leal 32(%esi),%esi leal 32(%ebp),%ebp decl %ecx jnz L(oop) popl %edx FRAME_popl() L(end): decl %edx C test %edx w/o clobbering carry js L(end2) incl %edx L(oop2): leal 4(%edi),%edi movl (%esi),%eax M4_inst %ebx,%eax movl 4(%ebp),%ebx movl %eax,-4(%edi) leal 4(%esi),%esi leal 4(%ebp),%ebp decl %edx jnz L(oop2) L(end2): movl (%esi),%eax M4_inst %ebx,%eax movl %eax,(%edi) sbbl %eax,%eax negl %eax popl %ebp popl %ebx popl %esi popl %edi ret EPILOGUE()