| #!/usr/local/bin/perl |
| # I have this in perl so I can use more usefull register names and then convert |
| # them into alpha registers. |
| # |
| |
| $d=&data(); |
| $d =~ s/CC/0/g; |
| $d =~ s/R1/1/g; |
| $d =~ s/R2/2/g; |
| $d =~ s/R3/3/g; |
| $d =~ s/R4/4/g; |
| $d =~ s/L1/5/g; |
| $d =~ s/L2/6/g; |
| $d =~ s/L3/7/g; |
| $d =~ s/L4/8/g; |
| $d =~ s/O1/22/g; |
| $d =~ s/O2/23/g; |
| $d =~ s/O3/24/g; |
| $d =~ s/O4/25/g; |
| $d =~ s/A1/20/g; |
| $d =~ s/A2/21/g; |
| $d =~ s/A3/27/g; |
| $d =~ s/A4/28/g; |
| if (0){ |
| } |
| |
| print $d; |
| |
| sub data |
| { |
| local($data)=<<'EOF'; |
| |
| # DEC Alpha assember |
| # The bn_div_words is actually gcc output but the other parts are hand done. |
| # Thanks to tzeruch@ceddec.com for sending me the gcc output for |
| # bn_div_words. |
| # I've gone back and re-done most of routines. |
| # The key thing to remeber for the 164 CPU is that while a |
| # multiply operation takes 8 cycles, another one can only be issued |
| # after 4 cycles have elapsed. I've done modification to help |
| # improve this. Also, normally, a ld instruction will not be available |
| # for about 3 cycles. |
| .file 1 "bn_asm.c" |
| .set noat |
| gcc2_compiled.: |
| __gnu_compiled_c: |
| .text |
| .align 3 |
| .globl bn_mul_add_words |
| .ent bn_mul_add_words |
| bn_mul_add_words: |
| bn_mul_add_words..ng: |
| .frame $30,0,$26,0 |
| .prologue 0 |
| .align 5 |
| subq $18,4,$18 |
| bis $31,$31,$CC |
| blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code |
| ldq $A1,0($17) # 1 1 |
| ldq $R1,0($16) # 1 1 |
| .align 3 |
| $42: |
| mulq $A1,$19,$L1 # 1 2 1 ###### |
| ldq $A2,8($17) # 2 1 |
| ldq $R2,8($16) # 2 1 |
| umulh $A1,$19,$A1 # 1 2 ###### |
| ldq $A3,16($17) # 3 1 |
| ldq $R3,16($16) # 3 1 |
| mulq $A2,$19,$L2 # 2 2 1 ###### |
| ldq $A4,24($17) # 4 1 |
| addq $R1,$L1,$R1 # 1 2 2 |
| ldq $R4,24($16) # 4 1 |
| umulh $A2,$19,$A2 # 2 2 ###### |
| cmpult $R1,$L1,$O1 # 1 2 3 1 |
| addq $A1,$O1,$A1 # 1 3 1 |
| addq $R1,$CC,$R1 # 1 2 3 1 |
| mulq $A3,$19,$L3 # 3 2 1 ###### |
| cmpult $R1,$CC,$CC # 1 2 3 2 |
| addq $R2,$L2,$R2 # 2 2 2 |
| addq $A1,$CC,$CC # 1 3 2 |
| cmpult $R2,$L2,$O2 # 2 2 3 1 |
| addq $A2,$O2,$A2 # 2 3 1 |
| umulh $A3,$19,$A3 # 3 2 ###### |
| addq $R2,$CC,$R2 # 2 2 3 1 |
| cmpult $R2,$CC,$CC # 2 2 3 2 |
| subq $18,4,$18 |
| mulq $A4,$19,$L4 # 4 2 1 ###### |
| addq $A2,$CC,$CC # 2 3 2 |
| addq $R3,$L3,$R3 # 3 2 2 |
| addq $16,32,$16 |
| cmpult $R3,$L3,$O3 # 3 2 3 1 |
| stq $R1,-32($16) # 1 2 4 |
| umulh $A4,$19,$A4 # 4 2 ###### |
| addq $A3,$O3,$A3 # 3 3 1 |
| addq $R3,$CC,$R3 # 3 2 3 1 |
| stq $R2,-24($16) # 2 2 4 |
| cmpult $R3,$CC,$CC # 3 2 3 2 |
| stq $R3,-16($16) # 3 2 4 |
| addq $R4,$L4,$R4 # 4 2 2 |
| addq $A3,$CC,$CC # 3 3 2 |
| cmpult $R4,$L4,$O4 # 4 2 3 1 |
| addq $17,32,$17 |
| addq $A4,$O4,$A4 # 4 3 1 |
| addq $R4,$CC,$R4 # 4 2 3 1 |
| cmpult $R4,$CC,$CC # 4 2 3 2 |
| stq $R4,-8($16) # 4 2 4 |
| addq $A4,$CC,$CC # 4 3 2 |
| blt $18,$43 |
| |
| ldq $A1,0($17) # 1 1 |
| ldq $R1,0($16) # 1 1 |
| |
| br $42 |
| |
| .align 4 |
| $45: |
| ldq $A1,0($17) # 4 1 |
| ldq $R1,0($16) # 4 1 |
| mulq $A1,$19,$L1 # 4 2 1 |
| subq $18,1,$18 |
| addq $16,8,$16 |
| addq $17,8,$17 |
| umulh $A1,$19,$A1 # 4 2 |
| addq $R1,$L1,$R1 # 4 2 2 |
| cmpult $R1,$L1,$O1 # 4 2 3 1 |
| addq $A1,$O1,$A1 # 4 3 1 |
| addq $R1,$CC,$R1 # 4 2 3 1 |
| cmpult $R1,$CC,$CC # 4 2 3 2 |
| addq $A1,$CC,$CC # 4 3 2 |
| stq $R1,-8($16) # 4 2 4 |
| bgt $18,$45 |
| ret $31,($26),1 # else exit |
| |
| .align 4 |
| $43: |
| addq $18,4,$18 |
| bgt $18,$45 # goto tail code |
| ret $31,($26),1 # else exit |
| |
| .end bn_mul_add_words |
| .align 3 |
| .globl bn_mul_words |
| .ent bn_mul_words |
| bn_mul_words: |
| bn_mul_words..ng: |
| .frame $30,0,$26,0 |
| .prologue 0 |
| .align 5 |
| subq $18,4,$18 |
| bis $31,$31,$CC |
| blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code |
| ldq $A1,0($17) # 1 1 |
| .align 3 |
| $142: |
| |
| mulq $A1,$19,$L1 # 1 2 1 ##### |
| ldq $A2,8($17) # 2 1 |
| ldq $A3,16($17) # 3 1 |
| umulh $A1,$19,$A1 # 1 2 ##### |
| ldq $A4,24($17) # 4 1 |
| mulq $A2,$19,$L2 # 2 2 1 ##### |
| addq $L1,$CC,$L1 # 1 2 3 1 |
| subq $18,4,$18 |
| cmpult $L1,$CC,$CC # 1 2 3 2 |
| umulh $A2,$19,$A2 # 2 2 ##### |
| addq $A1,$CC,$CC # 1 3 2 |
| addq $17,32,$17 |
| addq $L2,$CC,$L2 # 2 2 3 1 |
| mulq $A3,$19,$L3 # 3 2 1 ##### |
| cmpult $L2,$CC,$CC # 2 2 3 2 |
| addq $A2,$CC,$CC # 2 3 2 |
| addq $16,32,$16 |
| umulh $A3,$19,$A3 # 3 2 ##### |
| stq $L1,-32($16) # 1 2 4 |
| mulq $A4,$19,$L4 # 4 2 1 ##### |
| addq $L3,$CC,$L3 # 3 2 3 1 |
| stq $L2,-24($16) # 2 2 4 |
| cmpult $L3,$CC,$CC # 3 2 3 2 |
| umulh $A4,$19,$A4 # 4 2 ##### |
| addq $A3,$CC,$CC # 3 3 2 |
| stq $L3,-16($16) # 3 2 4 |
| addq $L4,$CC,$L4 # 4 2 3 1 |
| cmpult $L4,$CC,$CC # 4 2 3 2 |
| |
| addq $A4,$CC,$CC # 4 3 2 |
| |
| stq $L4,-8($16) # 4 2 4 |
| |
| blt $18,$143 |
| |
| ldq $A1,0($17) # 1 1 |
| |
| br $142 |
| |
| .align 4 |
| $145: |
| ldq $A1,0($17) # 4 1 |
| mulq $A1,$19,$L1 # 4 2 1 |
| subq $18,1,$18 |
| umulh $A1,$19,$A1 # 4 2 |
| addq $L1,$CC,$L1 # 4 2 3 1 |
| addq $16,8,$16 |
| cmpult $L1,$CC,$CC # 4 2 3 2 |
| addq $17,8,$17 |
| addq $A1,$CC,$CC # 4 3 2 |
| stq $L1,-8($16) # 4 2 4 |
| |
| bgt $18,$145 |
| ret $31,($26),1 # else exit |
| |
| .align 4 |
| $143: |
| addq $18,4,$18 |
| bgt $18,$145 # goto tail code |
| ret $31,($26),1 # else exit |
| |
| .end bn_mul_words |
| .align 3 |
| .globl bn_sqr_words |
| .ent bn_sqr_words |
| bn_sqr_words: |
| bn_sqr_words..ng: |
| .frame $30,0,$26,0 |
| .prologue 0 |
| |
| subq $18,4,$18 |
| blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code |
| ldq $A1,0($17) # 1 1 |
| .align 3 |
| $542: |
| mulq $A1,$A1,$L1 ###### |
| ldq $A2,8($17) # 1 1 |
| subq $18,4 |
| umulh $A1,$A1,$R1 ###### |
| ldq $A3,16($17) # 1 1 |
| mulq $A2,$A2,$L2 ###### |
| ldq $A4,24($17) # 1 1 |
| stq $L1,0($16) # r[0] |
| umulh $A2,$A2,$R2 ###### |
| stq $R1,8($16) # r[1] |
| mulq $A3,$A3,$L3 ###### |
| stq $L2,16($16) # r[0] |
| umulh $A3,$A3,$R3 ###### |
| stq $R2,24($16) # r[1] |
| mulq $A4,$A4,$L4 ###### |
| stq $L3,32($16) # r[0] |
| umulh $A4,$A4,$R4 ###### |
| stq $R3,40($16) # r[1] |
| |
| addq $16,64,$16 |
| addq $17,32,$17 |
| stq $L4,-16($16) # r[0] |
| stq $R4,-8($16) # r[1] |
| |
| blt $18,$543 |
| ldq $A1,0($17) # 1 1 |
| br $542 |
| |
| $442: |
| ldq $A1,0($17) # a[0] |
| mulq $A1,$A1,$L1 # a[0]*w low part r2 |
| addq $16,16,$16 |
| addq $17,8,$17 |
| subq $18,1,$18 |
| umulh $A1,$A1,$R1 # a[0]*w high part r3 |
| stq $L1,-16($16) # r[0] |
| stq $R1,-8($16) # r[1] |
| |
| bgt $18,$442 |
| ret $31,($26),1 # else exit |
| |
| .align 4 |
| $543: |
| addq $18,4,$18 |
| bgt $18,$442 # goto tail code |
| ret $31,($26),1 # else exit |
| .end bn_sqr_words |
| |
| .align 3 |
| .globl bn_add_words |
| .ent bn_add_words |
| bn_add_words: |
| bn_add_words..ng: |
| .frame $30,0,$26,0 |
| .prologue 0 |
| |
| subq $19,4,$19 |
| bis $31,$31,$CC # carry = 0 |
| blt $19,$900 |
| ldq $L1,0($17) # a[0] |
| ldq $R1,0($18) # b[1] |
| .align 3 |
| $901: |
| addq $R1,$L1,$R1 # r=a+b; |
| ldq $L2,8($17) # a[1] |
| cmpult $R1,$L1,$O1 # did we overflow? |
| ldq $R2,8($18) # b[1] |
| addq $R1,$CC,$R1 # c+= overflow |
| ldq $L3,16($17) # a[2] |
| cmpult $R1,$CC,$CC # overflow? |
| ldq $R3,16($18) # b[2] |
| addq $CC,$O1,$CC |
| ldq $L4,24($17) # a[3] |
| addq $R2,$L2,$R2 # r=a+b; |
| ldq $R4,24($18) # b[3] |
| cmpult $R2,$L2,$O2 # did we overflow? |
| addq $R3,$L3,$R3 # r=a+b; |
| addq $R2,$CC,$R2 # c+= overflow |
| cmpult $R3,$L3,$O3 # did we overflow? |
| cmpult $R2,$CC,$CC # overflow? |
| addq $R4,$L4,$R4 # r=a+b; |
| addq $CC,$O2,$CC |
| cmpult $R4,$L4,$O4 # did we overflow? |
| addq $R3,$CC,$R3 # c+= overflow |
| stq $R1,0($16) # r[0]=c |
| cmpult $R3,$CC,$CC # overflow? |
| stq $R2,8($16) # r[1]=c |
| addq $CC,$O3,$CC |
| stq $R3,16($16) # r[2]=c |
| addq $R4,$CC,$R4 # c+= overflow |
| subq $19,4,$19 # loop-- |
| cmpult $R4,$CC,$CC # overflow? |
| addq $17,32,$17 # a++ |
| addq $CC,$O4,$CC |
| stq $R4,24($16) # r[3]=c |
| addq $18,32,$18 # b++ |
| addq $16,32,$16 # r++ |
| |
| blt $19,$900 |
| ldq $L1,0($17) # a[0] |
| ldq $R1,0($18) # b[1] |
| br $901 |
| .align 4 |
| $945: |
| ldq $L1,0($17) # a[0] |
| ldq $R1,0($18) # b[1] |
| addq $R1,$L1,$R1 # r=a+b; |
| subq $19,1,$19 # loop-- |
| addq $R1,$CC,$R1 # c+= overflow |
| addq $17,8,$17 # a++ |
| cmpult $R1,$L1,$O1 # did we overflow? |
| cmpult $R1,$CC,$CC # overflow? |
| addq $18,8,$18 # b++ |
| stq $R1,0($16) # r[0]=c |
| addq $CC,$O1,$CC |
| addq $16,8,$16 # r++ |
| |
| bgt $19,$945 |
| ret $31,($26),1 # else exit |
| |
| $900: |
| addq $19,4,$19 |
| bgt $19,$945 # goto tail code |
| ret $31,($26),1 # else exit |
| .end bn_add_words |
| |
| .align 3 |
| .globl bn_sub_words |
| .ent bn_sub_words |
| bn_sub_words: |
| bn_sub_words..ng: |
| .frame $30,0,$26,0 |
| .prologue 0 |
| |
| subq $19,4,$19 |
| bis $31,$31,$CC # carry = 0 |
| br $800 |
| blt $19,$800 |
| ldq $L1,0($17) # a[0] |
| ldq $R1,0($18) # b[1] |
| .align 3 |
| $801: |
| addq $R1,$L1,$R1 # r=a+b; |
| ldq $L2,8($17) # a[1] |
| cmpult $R1,$L1,$O1 # did we overflow? |
| ldq $R2,8($18) # b[1] |
| addq $R1,$CC,$R1 # c+= overflow |
| ldq $L3,16($17) # a[2] |
| cmpult $R1,$CC,$CC # overflow? |
| ldq $R3,16($18) # b[2] |
| addq $CC,$O1,$CC |
| ldq $L4,24($17) # a[3] |
| addq $R2,$L2,$R2 # r=a+b; |
| ldq $R4,24($18) # b[3] |
| cmpult $R2,$L2,$O2 # did we overflow? |
| addq $R3,$L3,$R3 # r=a+b; |
| addq $R2,$CC,$R2 # c+= overflow |
| cmpult $R3,$L3,$O3 # did we overflow? |
| cmpult $R2,$CC,$CC # overflow? |
| addq $R4,$L4,$R4 # r=a+b; |
| addq $CC,$O2,$CC |
| cmpult $R4,$L4,$O4 # did we overflow? |
| addq $R3,$CC,$R3 # c+= overflow |
| stq $R1,0($16) # r[0]=c |
| cmpult $R3,$CC,$CC # overflow? |
| stq $R2,8($16) # r[1]=c |
| addq $CC,$O3,$CC |
| stq $R3,16($16) # r[2]=c |
| addq $R4,$CC,$R4 # c+= overflow |
| subq $19,4,$19 # loop-- |
| cmpult $R4,$CC,$CC # overflow? |
| addq $17,32,$17 # a++ |
| addq $CC,$O4,$CC |
| stq $R4,24($16) # r[3]=c |
| addq $18,32,$18 # b++ |
| addq $16,32,$16 # r++ |
| |
| blt $19,$800 |
| ldq $L1,0($17) # a[0] |
| ldq $R1,0($18) # b[1] |
| br $801 |
| .align 4 |
| $845: |
| ldq $L1,0($17) # a[0] |
| ldq $R1,0($18) # b[1] |
| cmpult $L1,$R1,$O1 # will we borrow? |
| subq $L1,$R1,$R1 # r=a-b; |
| subq $19,1,$19 # loop-- |
| cmpult $R1,$CC,$O2 # will we borrow? |
| subq $R1,$CC,$R1 # c+= overflow |
| addq $17,8,$17 # a++ |
| addq $18,8,$18 # b++ |
| stq $R1,0($16) # r[0]=c |
| addq $O2,$O1,$CC |
| addq $16,8,$16 # r++ |
| |
| bgt $19,$845 |
| ret $31,($26),1 # else exit |
| |
| $800: |
| addq $19,4,$19 |
| bgt $19,$845 # goto tail code |
| ret $31,($26),1 # else exit |
| .end bn_sub_words |
| |
| # |
| # What follows was taken directly from the C compiler with a few |
| # hacks to redo the lables. |
| # |
| .text |
| .align 3 |
| .globl bn_div_words |
| .ent bn_div_words |
| bn_div_words: |
| ldgp $29,0($27) |
| bn_div_words..ng: |
| lda $30,-48($30) |
| .frame $30,48,$26,0 |
| stq $26,0($30) |
| stq $9,8($30) |
| stq $10,16($30) |
| stq $11,24($30) |
| stq $12,32($30) |
| stq $13,40($30) |
| .mask 0x4003e00,-48 |
| .prologue 1 |
| bis $16,$16,$9 |
| bis $17,$17,$10 |
| bis $18,$18,$11 |
| bis $31,$31,$13 |
| bis $31,2,$12 |
| bne $11,$119 |
| lda $0,-1 |
| br $31,$136 |
| .align 4 |
| $119: |
| bis $11,$11,$16 |
| jsr $26,BN_num_bits_word |
| ldgp $29,0($26) |
| subq $0,64,$1 |
| beq $1,$120 |
| bis $31,1,$1 |
| sll $1,$0,$1 |
| cmpule $9,$1,$1 |
| bne $1,$120 |
| # lda $16,_IO_stderr_ |
| # lda $17,$C32 |
| # bis $0,$0,$18 |
| # jsr $26,fprintf |
| # ldgp $29,0($26) |
| jsr $26,abort |
| ldgp $29,0($26) |
| .align 4 |
| $120: |
| bis $31,64,$3 |
| cmpult $9,$11,$2 |
| subq $3,$0,$1 |
| addl $1,$31,$0 |
| subq $9,$11,$1 |
| cmoveq $2,$1,$9 |
| beq $0,$122 |
| zapnot $0,15,$2 |
| subq $3,$0,$1 |
| sll $11,$2,$11 |
| sll $9,$2,$3 |
| srl $10,$1,$1 |
| sll $10,$2,$10 |
| bis $3,$1,$9 |
| $122: |
| srl $11,32,$5 |
| zapnot $11,15,$6 |
| lda $7,-1 |
| .align 5 |
| $123: |
| srl $9,32,$1 |
| subq $1,$5,$1 |
| bne $1,$126 |
| zapnot $7,15,$27 |
| br $31,$127 |
| .align 4 |
| $126: |
| bis $9,$9,$24 |
| bis $5,$5,$25 |
| divqu $24,$25,$27 |
| $127: |
| srl $10,32,$4 |
| .align 5 |
| $128: |
| mulq $27,$5,$1 |
| subq $9,$1,$3 |
| zapnot $3,240,$1 |
| bne $1,$129 |
| mulq $6,$27,$2 |
| sll $3,32,$1 |
| addq $1,$4,$1 |
| cmpule $2,$1,$2 |
| bne $2,$129 |
| subq $27,1,$27 |
| br $31,$128 |
| .align 4 |
| $129: |
| mulq $27,$6,$1 |
| mulq $27,$5,$4 |
| srl $1,32,$3 |
| sll $1,32,$1 |
| addq $4,$3,$4 |
| cmpult $10,$1,$2 |
| subq $10,$1,$10 |
| addq $2,$4,$2 |
| cmpult $9,$2,$1 |
| bis $2,$2,$4 |
| beq $1,$134 |
| addq $9,$11,$9 |
| subq $27,1,$27 |
| $134: |
| subl $12,1,$12 |
| subq $9,$4,$9 |
| beq $12,$124 |
| sll $27,32,$13 |
| sll $9,32,$2 |
| srl $10,32,$1 |
| sll $10,32,$10 |
| bis $2,$1,$9 |
| br $31,$123 |
| .align 4 |
| $124: |
| bis $13,$27,$0 |
| $136: |
| ldq $26,0($30) |
| ldq $9,8($30) |
| ldq $10,16($30) |
| ldq $11,24($30) |
| ldq $12,32($30) |
| ldq $13,40($30) |
| addq $30,48,$30 |
| ret $31,($26),1 |
| .end bn_div_words |
| EOF |
| return($data); |
| } |
| |