| #!/usr/bin/env perl |
| # |
| # ==================================================================== |
| # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
| # project. The module is, however, dual licensed under OpenSSL and |
| # CRYPTOGAMS licenses depending on where you obtain it. For further |
| # details see http://www.openssl.org/~appro/cryptogams/. |
| # ==================================================================== |
| # |
| # This module implements Poly1305 hash for s390x. |
| # |
| # June 2015 |
| # |
| # ~6.4/2.2 cpb on z10/z196+, >2x improvement over compiler-generated |
| # code. For older compiler improvement coefficient is >3x, because |
| # then base 2^64 and base 2^32 implementations are compared. |
| # |
| # On side note, z13 enables vector base 2^26 implementation... |
| |
| $flavour = shift; |
| |
| if ($flavour =~ /3[12]/) { |
| $SIZE_T=4; |
| $g=""; |
| } else { |
| $SIZE_T=8; |
| $g="g"; |
| } |
| |
| while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} |
| open STDOUT,">$output"; |
| |
| $sp="%r15"; |
| |
| my ($ctx,$inp,$len,$padbit) = map("%r$_",(2..5)); |
| |
| $code.=<<___; |
| .text |
| |
| .globl poly1305_init |
| .type poly1305_init,\@function |
| .align 16 |
| poly1305_init: |
| lghi %r0,0 |
| lghi %r1,-1 |
| stg %r0,0($ctx) # zero hash value |
| stg %r0,8($ctx) |
| stg %r0,16($ctx) |
| |
| cl${g}r $inp,%r0 |
| je .Lno_key |
| |
| lrvg %r4,0($inp) # load little-endian key |
| lrvg %r5,8($inp) |
| |
| nihl %r1,0xffc0 # 0xffffffc0ffffffff |
| srlg %r0,%r1,4 # 0x0ffffffc0fffffff |
| srlg %r1,%r1,4 |
| nill %r1,0xfffc # 0x0ffffffc0ffffffc |
| |
| ngr %r4,%r0 |
| ngr %r5,%r1 |
| |
| stg %r4,32($ctx) |
| stg %r5,40($ctx) |
| |
| .Lno_key: |
| lghi %r2,0 |
| br %r14 |
| .size poly1305_init,.-poly1305_init |
| ___ |
| { |
| my ($d0hi,$d0lo,$d1hi,$d1lo,$t0,$h0,$t1,$h1,$h2) = map("%r$_",(6..14)); |
| my ($r0,$r1,$s1) = map("%r$_",(0..2)); |
| |
| $code.=<<___; |
| .globl poly1305_blocks |
| .type poly1305_blocks,\@function |
| .align 16 |
| poly1305_blocks: |
| srl${g} $len,$len,4 |
| lghi %r0,0 |
| cl${g}r $len,%r0 |
| je .Lno_data |
| |
| stm${g} %r6,%r14,`6*$SIZE_T`($sp) |
| |
| lg $r0,32($ctx) # load key |
| lg $r1,40($ctx) |
| |
| lg $h0,0($ctx) # load hash value |
| lg $h1,8($ctx) |
| lg $h2,16($ctx) |
| |
| st$g $ctx,`2*$SIZE_T`($sp) # off-load $ctx |
| srlg $s1,$r1,2 |
| algr $s1,$r1 # s1 = r1 + r1>>2 |
| j .Loop |
| |
| .align 16 |
| .Loop: |
| lrvg $d0lo,0($inp) # load little-endian input |
| lrvg $d1lo,8($inp) |
| la $inp,16($inp) |
| |
| algr $d0lo,$h0 # accumulate input |
| alcgr $d1lo,$h1 |
| |
| lgr $h0,$d0lo |
| mlgr $d0hi,$r0 # h0*r0 -> $d0hi:$d0lo |
| lgr $h1,$d1lo |
| mlgr $d1hi,$s1 # h1*5*r1 -> $d1hi:$d1lo |
| |
| mlgr $t0,$r1 # h0*r1 -> $t0:$h0 |
| mlgr $t1,$r0 # h1*r0 -> $t1:$h1 |
| alcgr $h2,$padbit |
| |
| algr $d0lo,$d1lo |
| lgr $d1lo,$h2 |
| alcgr $d0hi,$d1hi |
| lghi $d1hi,0 |
| |
| algr $h1,$h0 |
| alcgr $t1,$t0 |
| |
| msgr $d1lo,$s1 # h2*s1 |
| msgr $h2,$r0 # h2*r0 |
| |
| algr $h1,$d1lo |
| alcgr $t1,$d1hi # $d1hi is zero |
| |
| algr $h1,$d0hi |
| alcgr $h2,$t1 |
| |
| lghi $h0,-4 # final reduction step |
| ngr $h0,$h2 |
| srlg $t0,$h2,2 |
| algr $h0,$t0 |
| |
| algr $h0,$d0lo |
| lghi $t1,3 |
| alcgr $h1,$d1hi # $d1hi is still zero |
| ngr $h2,$t1 |
| |
| brct$g $len,.Loop |
| |
| l$g $ctx,`2*$SIZE_T`($sp) # restore $ctx |
| |
| stg $h0,0($ctx) # store hash value |
| stg $h1,8($ctx) |
| stg $h2,16($ctx) |
| |
| lm${g} %r6,%r14,`6*$SIZE_T`($sp) |
| .Lno_data: |
| br %r14 |
| .size poly1305_blocks,.-poly1305_blocks |
| ___ |
| } |
| { |
| my ($mac,$nonce)=($inp,$len); |
| my ($h0,$h1,$h2,$d0,$d1)=map("%r$_",(5..9)); |
| |
| $code.=<<___; |
| .globl poly1305_emit |
| .type poly1305_emit,\@function |
| .align 16 |
| poly1305_emit: |
| stm${g} %r6,%r9,`6*$SIZE_T`($sp) |
| |
| lg $h0,0($ctx) |
| lg $h1,8($ctx) |
| lg $h2,16($ctx) |
| |
| lghi %r0,5 |
| lghi %r1,0 |
| lgr $d0,$h0 |
| lgr $d1,$h1 |
| |
| algr $h0,%r0 # compare to modulus |
| alcgr $h1,%r1 |
| alcgr $h2,%r1 |
| |
| srlg $h2,$h2,2 # did it borrow/carry? |
| slgr %r1,$h2 # 0-$h2>>2 |
| lg $h2,0($nonce) # load nonce |
| lghi %r0,-1 |
| lg $ctx,8($nonce) |
| xgr %r0,%r1 # ~%r1 |
| |
| ngr $h0,%r1 |
| ngr $d0,%r0 |
| ngr $h1,%r1 |
| ngr $d1,%r0 |
| ogr $h0,$d0 |
| rllg $d0,$h2,32 # flip nonce words |
| ogr $h1,$d1 |
| rllg $d1,$ctx,32 |
| |
| algr $h0,$d0 # accumulate nonce |
| alcgr $h1,$d1 |
| |
| strvg $h0,0($mac) # write little-endian result |
| strvg $h1,8($mac) |
| |
| lm${g} %r6,%r9,`6*$SIZE_T`($sp) |
| br %r14 |
| .size poly1305_emit,.-poly1305_emit |
| |
| .string "Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>" |
| ___ |
| } |
| |
| $code =~ s/\`([^\`]*)\`/eval $1/gem; |
| |
| print $code; |
| close STDOUT; |