| #! /usr/bin/env perl |
| # Copyright 2021 The OpenSSL Project Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License 2.0 (the "License"). You may not use |
| # this file except in compliance with the License. You can obtain a copy |
| # in the file LICENSE in the source distribution or at |
| # https://www.openssl.org/source/license.html |
| # |
| # This module implements support for Armv8 SM3 instructions |
| |
| # $output is the last argument if it looks like a file (it has an extension) |
| # $flavour is the first argument if it doesn't look like a file |
| $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; |
| $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; |
| |
| $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or |
| ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or |
| die "can't locate arm-xlate.pl"; |
| |
| open OUT,"| \"$^X\" $xlate $flavour \"$output\"" |
| or die "can't call $xlate: $!"; |
| *STDOUT=*OUT; |
| |
| # Message expanding: |
| # Wj <- P1(W[j-16]^W[j-9]^(W[j-3]<<<15))^(W[j-13]<<<7)^W[j-6] |
| # Input: s0, s1, s2, s3 |
| # s0 = w0 | w1 | w2 | w3 |
| # s1 = w4 | w5 | w6 | w7 |
| # s2 = w8 | w9 | w10 | w11 |
| # s3 = w12 | w13 | w14 | w15 |
| # Output: s4 |
| sub msg_exp () { |
| my $s0 = shift; |
| my $s1 = shift; |
| my $s2 = shift; |
| my $s3 = shift; |
| my $s4 = shift; |
| my $vtmp1 = shift; |
| my $vtmp2 = shift; |
| $code.=<<___; |
| // s4 = w7 | w8 | w9 | w10 |
| ext $s4.16b, $s1.16b, $s2.16b, #12 |
| // vtmp1 = w3 | w4 | w5 | w6 |
| ext $vtmp1.16b, $s0.16b, $s1.16b, #12 |
| // vtmp2 = w10 | w11 | w12 | w13 |
| ext $vtmp2.16b, $s2.16b, $s3.16b, #8 |
| sm3partw1 $s4.4s, $s0.4s, $s3.4s |
| sm3partw2 $s4.4s, $vtmp2.4s, $vtmp1.4s |
| ___ |
| } |
| |
| # A round of compresson function |
| # Input: |
| # ab - choose instruction among sm3tt1a, sm3tt1b, sm3tt2a, sm3tt2b |
| # vstate0 - vstate1, store digest status(A - H) |
| # vconst0 - vconst1, interleaved used to store Tj <<< j |
| # vtmp - temporary register |
| # vw - for sm3tt1ab, vw = s0 eor s1 |
| # s0 - for sm3tt2ab, just be s0 |
| # i, choose wj' or wj from vw |
| sub round () { |
| my $ab = shift; |
| my $vstate0 = shift; |
| my $vstate1 = shift; |
| my $vconst0 = shift; |
| my $vconst1 = shift; |
| my $vtmp = shift; |
| my $vw = shift; |
| my $s0 = shift; |
| my $i = shift; |
| $code.=<<___; |
| sm3ss1 $vtmp.4s, $vstate0.4s, $vconst0.4s, $vstate1.4s |
| shl $vconst1.4s, $vconst0.4s, #1 |
| sri $vconst1.4s, $vconst0.4s, #31 |
| sm3tt1$ab $vstate0.4s, $vtmp.4s, $vw.4s[$i] |
| sm3tt2$ab $vstate1.4s, $vtmp.4s, $s0.4s[$i] |
| ___ |
| } |
| |
| sub qround () { |
| my $ab = shift; |
| my $vstate0 = shift; |
| my $vstate1 = shift; |
| my $vconst0 = shift; |
| my $vconst1 = shift; |
| my $vtmp1 = shift; |
| my $vtmp2 = shift; |
| my $s0 = shift; |
| my $s1 = shift; |
| my $s2 = shift; |
| my $s3 = shift; |
| my $s4 = shift; |
| if($s4) { |
| &msg_exp($s0, $s1, $s2, $s3, $s4, $vtmp1, $vtmp2); |
| } |
| $code.=<<___; |
| eor $vtmp1.16b, $s0.16b, $s1.16b |
| ___ |
| &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2, |
| $vtmp1, $s0, 0); |
| &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2, |
| $vtmp1, $s0, 1); |
| &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2, |
| $vtmp1, $s0, 2); |
| &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2, |
| $vtmp1, $s0, 3); |
| } |
| |
| $code=<<___; |
| #include "arm_arch.h" |
| .arch armv8.2-a |
| .text |
| ___ |
| |
| {{{ |
| my ($pstate,$pdata,$num)=("x0","x1","w2"); |
| my ($state1,$state2)=("v5","v6"); |
| my ($sconst1, $sconst2)=("s16","s17"); |
| my ($vconst1, $vconst2)=("v16","v17"); |
| my ($s0,$s1,$s2,$s3,$s4)=map("v$_",(0..4)); |
| my ($bkstate1,$bkstate2)=("v18","v19"); |
| my ($vconst_tmp1,$vconst_tmp2)=("v20","v21"); |
| my ($vtmp1,$vtmp2)=("v22","v23"); |
| my $constaddr="x8"; |
| # void ossl_hwsm3_block_data_order(SM3_CTX *c, const void *p, size_t num) |
| $code.=<<___; |
| .globl ossl_hwsm3_block_data_order |
| .type ossl_hwsm3_block_data_order,%function |
| .align 5 |
| ossl_hwsm3_block_data_order: |
| AARCH64_VALID_CALL_TARGET |
| // load state |
| ld1 {$state1.4s-$state2.4s}, [$pstate] |
| rev64 $state1.4s, $state1.4s |
| rev64 $state2.4s, $state2.4s |
| ext $state1.16b, $state1.16b, $state1.16b, #8 |
| ext $state2.16b, $state2.16b, $state2.16b, #8 |
| |
| adr $constaddr, .Tj |
| ldp $sconst1, $sconst2, [$constaddr] |
| |
| .Loop: |
| // load input |
| ld1 {$s0.16b-$s3.16b}, [$pdata], #64 |
| sub $num, $num, #1 |
| |
| mov $bkstate1.16b, $state1.16b |
| mov $bkstate2.16b, $state2.16b |
| |
| #ifndef __ARMEB__ |
| rev32 $s0.16b, $s0.16b |
| rev32 $s1.16b, $s1.16b |
| rev32 $s2.16b, $s2.16b |
| rev32 $s3.16b, $s3.16b |
| #endif |
| |
| ext $vconst_tmp1.16b, $vconst1.16b, $vconst1.16b, #4 |
| ___ |
| &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s0,$s1,$s2,$s3,$s4); |
| &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s1,$s2,$s3,$s4,$s0); |
| &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s2,$s3,$s4,$s0,$s1); |
| &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s3,$s4,$s0,$s1,$s2); |
| |
| $code.=<<___; |
| ext $vconst_tmp1.16b, $vconst2.16b, $vconst2.16b, #4 |
| ___ |
| |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s4,$s0,$s1,$s2,$s3); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s0,$s1,$s2,$s3,$s4); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s1,$s2,$s3,$s4,$s0); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s2,$s3,$s4,$s0,$s1); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s3,$s4,$s0,$s1,$s2); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s4,$s0,$s1,$s2,$s3); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s0,$s1,$s2,$s3,$s4); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s1,$s2,$s3,$s4,$s0); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s2,$s3,$s4,$s0,$s1); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s3,$s4); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s4,$s0); |
| &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, |
| $s0,$s1); |
| |
| $code.=<<___; |
| eor $state1.16b, $state1.16b, $bkstate1.16b |
| eor $state2.16b, $state2.16b, $bkstate2.16b |
| |
| // any remained blocks? |
| cbnz $num, .Loop |
| |
| // save state |
| rev64 $state1.4s, $state1.4s |
| rev64 $state2.4s, $state2.4s |
| ext $state1.16b, $state1.16b, $state1.16b, #8 |
| ext $state2.16b, $state2.16b, $state2.16b, #8 |
| st1 {$state1.4s-$state2.4s}, [$pstate] |
| ret |
| .size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order |
| |
| .align 3 |
| .Tj: |
| .word 0x79cc4519, 0x9d8a7a87 |
| ___ |
| }}} |
| |
| ######################################### |
| my %sm3partopcode = ( |
| "sm3partw1" => 0xce60C000, |
| "sm3partw2" => 0xce60C400); |
| |
| my %sm3ss1opcode = ( |
| "sm3ss1" => 0xce400000); |
| |
| my %sm3ttopcode = ( |
| "sm3tt1a" => 0xce408000, |
| "sm3tt1b" => 0xce408400, |
| "sm3tt2a" => 0xce408800, |
| "sm3tt2b" => 0xce408C00); |
| |
| sub unsm3part { |
| my ($mnemonic,$arg)=@_; |
| |
| $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o |
| && |
| sprintf ".inst\t0x%08x\t//%s %s", |
| $sm3partopcode{$mnemonic}|$1|($2<<5)|($3<<16), |
| $mnemonic,$arg; |
| } |
| |
| sub unsm3ss1 { |
| my ($mnemonic,$arg)=@_; |
| |
| $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o |
| && |
| sprintf ".inst\t0x%08x\t//%s %s", |
| $sm3ss1opcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<10), |
| $mnemonic,$arg; |
| } |
| |
| sub unsm3tt { |
| my ($mnemonic,$arg)=@_; |
| |
| $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*\[([0-3])\]/o |
| && |
| sprintf ".inst\t0x%08x\t//%s %s", |
| $sm3ttopcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<12), |
| $mnemonic,$arg; |
| } |
| |
| open SELF,$0; |
| while(<SELF>) { |
| next if (/^#!/); |
| last if (!s/^#/\/\// and !/^$/); |
| print; |
| } |
| close SELF; |
| |
| foreach(split("\n",$code)) { |
| s/\`([^\`]*)\`/eval($1)/ge; |
| |
| s/\b(sm3partw[1-2])\s+([qv].*)/unsm3part($1,$2)/ge; |
| s/\b(sm3ss1)\s+([qv].*)/unsm3ss1($1,$2)/ge; |
| s/\b(sm3tt[1-2][a-b])\s+([qv].*)/unsm3tt($1,$2)/ge; |
| print $_,"\n"; |
| } |
| |
| close STDOUT or die "error closing STDOUT: $!"; |