| .text |
| |
| .set noat |
| .set noreorder |
| |
| .align 5 |
| .globl bn_mul_mont |
| .ent bn_mul_mont |
| bn_mul_mont: |
| lw $8,16($29) |
| lw $9,20($29) |
| slt $1,$9,4 |
| bnez $1,1f |
| li $2,0 |
| slt $1,$9,17 # on in-order CPU |
| bnezl $1,bn_mul_mont_internal |
| nop |
| 1: jr $31 |
| li $4,0 |
| .end bn_mul_mont |
| |
| .align 5 |
| .ent bn_mul_mont_internal |
| bn_mul_mont_internal: |
| .frame $30,14*4,$31 |
| .mask 0x40000000|16711680,-4 |
| sub $29,14*4 |
| sw $30,(14-1)*4($29) |
| sw $23,(14-2)*4($29) |
| sw $22,(14-3)*4($29) |
| sw $21,(14-4)*4($29) |
| sw $20,(14-5)*4($29) |
| sw $19,(14-6)*4($29) |
| sw $18,(14-7)*4($29) |
| sw $17,(14-8)*4($29) |
| sw $16,(14-9)*4($29) |
| move $30,$29 |
| |
| .set reorder |
| lw $8,0($8) |
| lw $13,0($6) # bp[0] |
| lw $12,0($5) # ap[0] |
| lw $14,0($7) # np[0] |
| |
| sub $29,2*4 # place for two extra words |
| sll $9,2 |
| li $1,-4096 |
| sub $29,$9 |
| and $29,$1 |
| |
| multu $12,$13 |
| lw $16,4($5) |
| lw $18,4($7) |
| mflo $10 |
| mfhi $11 |
| multu $10,$8 |
| mflo $23 |
| |
| multu $16,$13 |
| mflo $16 |
| mfhi $17 |
| |
| multu $14,$23 |
| mflo $24 |
| mfhi $25 |
| multu $18,$23 |
| addu $24,$10 |
| sltu $1,$24,$10 |
| addu $25,$1 |
| mflo $18 |
| mfhi $19 |
| |
| move $15,$29 |
| li $22,2*4 |
| .align 4 |
| .L1st: |
| .set noreorder |
| add $12,$5,$22 |
| add $14,$7,$22 |
| lw $12,($12) |
| lw $14,($14) |
| |
| multu $12,$13 |
| addu $10,$16,$11 |
| addu $24,$18,$25 |
| sltu $1,$10,$11 |
| sltu $2,$24,$25 |
| addu $11,$17,$1 |
| addu $25,$19,$2 |
| mflo $16 |
| mfhi $17 |
| |
| addu $24,$10 |
| sltu $1,$24,$10 |
| multu $14,$23 |
| addu $25,$1 |
| addu $22,4 |
| sw $24,($15) |
| sltu $2,$22,$9 |
| mflo $18 |
| mfhi $19 |
| |
| bnez $2,.L1st |
| add $15,4 |
| .set reorder |
| |
| addu $10,$16,$11 |
| sltu $1,$10,$11 |
| addu $11,$17,$1 |
| |
| addu $24,$18,$25 |
| sltu $2,$24,$25 |
| addu $25,$19,$2 |
| addu $24,$10 |
| sltu $1,$24,$10 |
| addu $25,$1 |
| |
| sw $24,($15) |
| |
| addu $25,$11 |
| sltu $1,$25,$11 |
| sw $25,4($15) |
| sw $1,2*4($15) |
| |
| li $21,4 |
| .align 4 |
| .Louter: |
| add $13,$6,$21 |
| lw $13,($13) |
| lw $12,($5) |
| lw $16,4($5) |
| lw $20,($29) |
| |
| multu $12,$13 |
| lw $14,($7) |
| lw $18,4($7) |
| mflo $10 |
| mfhi $11 |
| addu $10,$20 |
| multu $10,$8 |
| sltu $1,$10,$20 |
| addu $11,$1 |
| mflo $23 |
| |
| multu $16,$13 |
| mflo $16 |
| mfhi $17 |
| |
| multu $14,$23 |
| mflo $24 |
| mfhi $25 |
| |
| multu $18,$23 |
| addu $24,$10 |
| sltu $1,$24,$10 |
| addu $25,$1 |
| mflo $18 |
| mfhi $19 |
| |
| move $15,$29 |
| li $22,2*4 |
| lw $20,4($15) |
| .align 4 |
| .Linner: |
| .set noreorder |
| add $12,$5,$22 |
| add $14,$7,$22 |
| lw $12,($12) |
| lw $14,($14) |
| |
| multu $12,$13 |
| addu $10,$16,$11 |
| addu $24,$18,$25 |
| sltu $1,$10,$11 |
| sltu $2,$24,$25 |
| addu $11,$17,$1 |
| addu $25,$19,$2 |
| mflo $16 |
| mfhi $17 |
| |
| addu $10,$20 |
| addu $22,4 |
| multu $14,$23 |
| sltu $1,$10,$20 |
| addu $24,$10 |
| addu $11,$1 |
| sltu $2,$24,$10 |
| lw $20,2*4($15) |
| addu $25,$2 |
| sltu $1,$22,$9 |
| mflo $18 |
| mfhi $19 |
| sw $24,($15) |
| bnez $1,.Linner |
| add $15,4 |
| .set reorder |
| |
| addu $10,$16,$11 |
| sltu $1,$10,$11 |
| addu $11,$17,$1 |
| addu $10,$20 |
| sltu $2,$10,$20 |
| addu $11,$2 |
| |
| lw $20,2*4($15) |
| addu $24,$18,$25 |
| sltu $1,$24,$25 |
| addu $25,$19,$1 |
| addu $24,$10 |
| sltu $2,$24,$10 |
| addu $25,$2 |
| sw $24,($15) |
| |
| addu $24,$25,$11 |
| sltu $25,$24,$11 |
| addu $24,$20 |
| sltu $1,$24,$20 |
| addu $25,$1 |
| sw $24,4($15) |
| sw $25,2*4($15) |
| |
| addu $21,4 |
| sltu $2,$21,$9 |
| bnez $2,.Louter |
| |
| .set noreorder |
| add $20,$29,$9 # &tp[num] |
| move $15,$29 |
| move $5,$29 |
| li $11,0 # clear borrow bit |
| |
| .align 4 |
| .Lsub: lw $10,($15) |
| lw $24,($7) |
| add $15,4 |
| add $7,4 |
| subu $24,$10,$24 # tp[i]-np[i] |
| sgtu $1,$24,$10 |
| subu $10,$24,$11 |
| sgtu $11,$10,$24 |
| sw $10,($4) |
| or $11,$1 |
| sltu $1,$15,$20 |
| bnez $1,.Lsub |
| add $4,4 |
| |
| subu $11,$25,$11 # handle upmost overflow bit |
| move $15,$29 |
| sub $4,$9 # restore rp |
| not $25,$11 |
| |
| and $5,$11,$29 |
| and $6,$25,$4 |
| or $5,$5,$6 # ap=borrow?tp:rp |
| |
| .align 4 |
| .Lcopy: lw $12,($5) |
| add $5,4 |
| sw $0,($15) |
| add $15,4 |
| sltu $1,$15,$20 |
| sw $12,($4) |
| bnez $1,.Lcopy |
| add $4,4 |
| |
| li $4,1 |
| li $2,1 |
| |
| .set noreorder |
| move $29,$30 |
| lw $30,(14-1)*4($29) |
| lw $23,(14-2)*4($29) |
| lw $22,(14-3)*4($29) |
| lw $21,(14-4)*4($29) |
| lw $20,(14-5)*4($29) |
| lw $19,(14-6)*4($29) |
| lw $18,(14-7)*4($29) |
| lw $17,(14-8)*4($29) |
| lw $16,(14-9)*4($29) |
| jr $31 |
| add $29,14*4 |
| .end bn_mul_mont_internal |
| .rdata |
| .asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>" |