blob: 32ecee55a5dae016e6f497c48302124c41756df6 [file] [log] [blame]
.text
.set noat
.set noreorder
.align 5
.globl bn_mul_mont
.ent bn_mul_mont
bn_mul_mont:
lw $8,16($29)
lw $9,20($29)
slt $1,$9,4
bnez $1,1f
li $2,0
slt $1,$9,17 # on in-order CPU
bnezl $1,bn_mul_mont_internal
nop
1: jr $31
li $4,0
.end bn_mul_mont
.align 5
.ent bn_mul_mont_internal
bn_mul_mont_internal:
.frame $30,14*4,$31
.mask 0x40000000|16711680,-4
sub $29,14*4
sw $30,(14-1)*4($29)
sw $23,(14-2)*4($29)
sw $22,(14-3)*4($29)
sw $21,(14-4)*4($29)
sw $20,(14-5)*4($29)
sw $19,(14-6)*4($29)
sw $18,(14-7)*4($29)
sw $17,(14-8)*4($29)
sw $16,(14-9)*4($29)
move $30,$29
.set reorder
lw $8,0($8)
lw $13,0($6) # bp[0]
lw $12,0($5) # ap[0]
lw $14,0($7) # np[0]
sub $29,2*4 # place for two extra words
sll $9,2
li $1,-4096
sub $29,$9
and $29,$1
multu $12,$13
lw $16,4($5)
lw $18,4($7)
mflo $10
mfhi $11
multu $10,$8
mflo $23
multu $16,$13
mflo $16
mfhi $17
multu $14,$23
mflo $24
mfhi $25
multu $18,$23
addu $24,$10
sltu $1,$24,$10
addu $25,$1
mflo $18
mfhi $19
move $15,$29
li $22,2*4
.align 4
.L1st:
.set noreorder
add $12,$5,$22
add $14,$7,$22
lw $12,($12)
lw $14,($14)
multu $12,$13
addu $10,$16,$11
addu $24,$18,$25
sltu $1,$10,$11
sltu $2,$24,$25
addu $11,$17,$1
addu $25,$19,$2
mflo $16
mfhi $17
addu $24,$10
sltu $1,$24,$10
multu $14,$23
addu $25,$1
addu $22,4
sw $24,($15)
sltu $2,$22,$9
mflo $18
mfhi $19
bnez $2,.L1st
add $15,4
.set reorder
addu $10,$16,$11
sltu $1,$10,$11
addu $11,$17,$1
addu $24,$18,$25
sltu $2,$24,$25
addu $25,$19,$2
addu $24,$10
sltu $1,$24,$10
addu $25,$1
sw $24,($15)
addu $25,$11
sltu $1,$25,$11
sw $25,4($15)
sw $1,2*4($15)
li $21,4
.align 4
.Louter:
add $13,$6,$21
lw $13,($13)
lw $12,($5)
lw $16,4($5)
lw $20,($29)
multu $12,$13
lw $14,($7)
lw $18,4($7)
mflo $10
mfhi $11
addu $10,$20
multu $10,$8
sltu $1,$10,$20
addu $11,$1
mflo $23
multu $16,$13
mflo $16
mfhi $17
multu $14,$23
mflo $24
mfhi $25
multu $18,$23
addu $24,$10
sltu $1,$24,$10
addu $25,$1
mflo $18
mfhi $19
move $15,$29
li $22,2*4
lw $20,4($15)
.align 4
.Linner:
.set noreorder
add $12,$5,$22
add $14,$7,$22
lw $12,($12)
lw $14,($14)
multu $12,$13
addu $10,$16,$11
addu $24,$18,$25
sltu $1,$10,$11
sltu $2,$24,$25
addu $11,$17,$1
addu $25,$19,$2
mflo $16
mfhi $17
addu $10,$20
addu $22,4
multu $14,$23
sltu $1,$10,$20
addu $24,$10
addu $11,$1
sltu $2,$24,$10
lw $20,2*4($15)
addu $25,$2
sltu $1,$22,$9
mflo $18
mfhi $19
sw $24,($15)
bnez $1,.Linner
add $15,4
.set reorder
addu $10,$16,$11
sltu $1,$10,$11
addu $11,$17,$1
addu $10,$20
sltu $2,$10,$20
addu $11,$2
lw $20,2*4($15)
addu $24,$18,$25
sltu $1,$24,$25
addu $25,$19,$1
addu $24,$10
sltu $2,$24,$10
addu $25,$2
sw $24,($15)
addu $24,$25,$11
sltu $25,$24,$11
addu $24,$20
sltu $1,$24,$20
addu $25,$1
sw $24,4($15)
sw $25,2*4($15)
addu $21,4
sltu $2,$21,$9
bnez $2,.Louter
.set noreorder
add $20,$29,$9 # &tp[num]
move $15,$29
move $5,$29
li $11,0 # clear borrow bit
.align 4
.Lsub: lw $10,($15)
lw $24,($7)
add $15,4
add $7,4
subu $24,$10,$24 # tp[i]-np[i]
sgtu $1,$24,$10
subu $10,$24,$11
sgtu $11,$10,$24
sw $10,($4)
or $11,$1
sltu $1,$15,$20
bnez $1,.Lsub
add $4,4
subu $11,$25,$11 # handle upmost overflow bit
move $15,$29
sub $4,$9 # restore rp
not $25,$11
and $5,$11,$29
and $6,$25,$4
or $5,$5,$6 # ap=borrow?tp:rp
.align 4
.Lcopy: lw $12,($5)
add $5,4
sw $0,($15)
add $15,4
sltu $1,$15,$20
sw $12,($4)
bnez $1,.Lcopy
add $4,4
li $4,1
li $2,1
.set noreorder
move $29,$30
lw $30,(14-1)*4($29)
lw $23,(14-2)*4($29)
lw $22,(14-3)*4($29)
lw $21,(14-4)*4($29)
lw $20,(14-5)*4($29)
lw $19,(14-6)*4($29)
lw $18,(14-7)*4($29)
lw $17,(14-8)*4($29)
lw $16,(14-9)*4($29)
jr $31
add $29,14*4
.end bn_mul_mont_internal
.rdata
.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"