1/* IBM POWER submul_1 -- Multiply a limb vector with a limb and subtract 2 * the result from a second limb vector. 3 * 4 * Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc. 5 * 6 * This file is part of Libgcrypt. 7 * 8 * Libgcrypt is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as 10 * published by the Free Software Foundation; either version 2.1 of 11 * the License, or (at your option) any later version. 12 * 13 * Libgcrypt is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 21 */ 22 23#include "sysdep.h" 24#include "asm-syntax.h" 25 26 27/* 28 29# INPUT PARAMETERS 30# res_ptr r3 31# s1_ptr r4 32# size r5 33# s2_limb r6 34 35# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To 36# obtain that operation, we have to use the 32x32->64 signed multiplication 37# instruction, and add the appropriate compensation to the high limb of the 38# result. We add the multiplicand if the multiplier has its most significant 39# bit set, and we add the multiplier if the multiplicand has its most 40# significant bit set. We need to preserve the carry flag between each 41# iteration, so we have to compute the compensation carefully (the natural, 42# srai+and doesn't work). Since the POWER architecture has a branch unit 43# we can branch in zero cycles, so that's how we perform the additions. 44 */ 45 46 .toc 47 .csect ._gcry_mpih_submul_1[PR] 48 .align 2 49 .globl _gcry_mpih_submul_1 50 .globl ._gcry_mpih_submul_1 51 .csect _gcry_mpih_submul_1[DS] 52_gcry_mpih_submul_1: 53 .long ._gcry_mpih_submul_1[PR], TOC[tc0], 0 54 .csect ._gcry_mpih_submul_1[PR] 55._gcry_mpih_submul_1: 56 57 cal 3,-4(3) 58 l 0,0(4) 59 cmpi 0,6,0 60 mtctr 5 61 mul 9,0,6 62 srai 7,0,31 63 and 7,7,6 64 mfmq 11 65 cax 9,9,7 66 l 7,4(3) 67 sf 8,11,7 # add res_limb 68 a 11,8,11 # invert cy (r11 is junk) 69 blt Lneg 70Lpos: bdz Lend 71 72Lploop: lu 0,4(4) 73 stu 8,4(3) 74 cmpi 0,0,0 75 mul 10,0,6 76 mfmq 0 77 ae 11,0,9 # low limb + old_cy_limb + old cy 78 l 7,4(3) 79 aze 10,10 # propagate cy to new cy_limb 80 sf 8,11,7 # add res_limb 81 a 11,8,11 # invert cy (r11 is junk) 82 bge Lp0 83 cax 10,10,6 # adjust high limb for negative limb from s1 84Lp0: bdz Lend0 85 lu 0,4(4) 86 stu 8,4(3) 87 cmpi 0,0,0 88 mul 9,0,6 89 mfmq 0 90 ae 11,0,10 91 l 7,4(3) 92 aze 9,9 93 sf 8,11,7 94 a 11,8,11 # invert cy (r11 is junk) 95 bge Lp1 96 cax 9,9,6 # adjust high limb for negative limb from s1 97Lp1: bdn Lploop 98 99 b Lend 100 101Lneg: cax 9,9,0 102 bdz Lend 103Lnloop: lu 0,4(4) 104 stu 8,4(3) 105 cmpi 0,0,0 106 mul 10,0,6 107 mfmq 7 108 ae 11,7,9 109 l 7,4(3) 110 ae 10,10,0 # propagate cy to new cy_limb 111 sf 8,11,7 # add res_limb 112 a 11,8,11 # invert cy (r11 is junk) 113 bge Ln0 114 cax 10,10,6 # adjust high limb for negative limb from s1 115Ln0: bdz Lend0 116 lu 0,4(4) 117 stu 8,4(3) 118 cmpi 0,0,0 119 mul 9,0,6 120 mfmq 7 121 ae 11,7,10 122 l 7,4(3) 123 ae 9,9,0 # propagate cy to new cy_limb 124 sf 8,11,7 # add res_limb 125 a 11,8,11 # invert cy (r11 is junk) 126 bge Ln1 127 cax 9,9,6 # adjust high limb for negative limb from s1 128Ln1: bdn Lnloop 129 b Lend 130 131Lend0: cal 9,0(10) 132Lend: st 8,4(3) 133 aze 3,9 134 br 135 136