Rich Salz | e0a6519 | 2016-04-19 22:10:43 -0400 | [diff] [blame] | 1 | #! /usr/bin/env perl |
| 2 | # Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. |
| 3 | # |
| 4 | # Licensed under the OpenSSL license (the "License"). You may not use |
| 5 | # this file except in compliance with the License. You can obtain a copy |
| 6 | # in the file LICENSE in the source distribution or at |
| 7 | # https://www.openssl.org/source/license.html |
| 8 | |
Andy Polyakov | 14e21f8 | 2004-07-26 20:18:55 +0000 | [diff] [blame] | 9 | |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 10 | $flavour = shift; |
| 11 | $output = shift; |
| 12 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
Andy Polyakov | 14e21f8 | 2004-07-26 20:18:55 +0000 | [diff] [blame] | 13 | |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 14 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
Andy Polyakov | 376729e | 2004-11-21 10:36:25 +0000 | [diff] [blame] | 15 | |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 16 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
Andy Polyakov | 301799b | 2011-06-04 12:20:45 +0000 | [diff] [blame] | 17 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
| 18 | ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or |
| 19 | die "can't locate x86_64-xlate.pl"; |
| 20 | |
Andy Polyakov | cfe1d99 | 2016-05-28 16:25:38 +0200 | [diff] [blame] | 21 | open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; |
Andy Polyakov | 46bf83f | 2013-01-22 22:11:31 +0100 | [diff] [blame] | 22 | *STDOUT=*OUT; |
Andy Polyakov | 376729e | 2004-11-21 10:36:25 +0000 | [diff] [blame] | 23 | |
Andy Polyakov | 5fabb88 | 2011-04-17 12:46:00 +0000 | [diff] [blame] | 24 | ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order |
| 25 | ("%rdi","%rsi","%rdx","%rcx"); # Unix order |
| 26 | |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 27 | print<<___; |
| 28 | .extern OPENSSL_cpuid_setup |
Andy Polyakov | ddc20d4 | 2011-05-18 16:24:19 +0000 | [diff] [blame] | 29 | .hidden OPENSSL_cpuid_setup |
Andy Polyakov | 932cc12 | 2007-05-14 15:57:19 +0000 | [diff] [blame] | 30 | .section .init |
| 31 | call OPENSSL_cpuid_setup |
| 32 | |
Andy Polyakov | ddc20d4 | 2011-05-18 16:24:19 +0000 | [diff] [blame] | 33 | .hidden OPENSSL_ia32cap_P |
Andy Polyakov | c5cd28b | 2012-11-17 19:04:15 +0000 | [diff] [blame] | 34 | .comm OPENSSL_ia32cap_P,16,4 |
Andy Polyakov | ddc20d4 | 2011-05-18 16:24:19 +0000 | [diff] [blame] | 35 | |
Andy Polyakov | 932cc12 | 2007-05-14 15:57:19 +0000 | [diff] [blame] | 36 | .text |
| 37 | |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 38 | .globl OPENSSL_atomic_add |
| 39 | .type OPENSSL_atomic_add,\@abi-omnipotent |
| 40 | .align 16 |
| 41 | OPENSSL_atomic_add: |
| 42 | movl ($arg1),%eax |
| 43 | .Lspin: leaq ($arg2,%rax),%r8 |
| 44 | .byte 0xf0 # lock |
| 45 | cmpxchgl %r8d,($arg1) |
| 46 | jne .Lspin |
| 47 | movl %r8d,%eax |
| 48 | .byte 0x48,0x98 # cltq/cdqe |
| 49 | ret |
| 50 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add |
| 51 | |
Andy Polyakov | 932cc12 | 2007-05-14 15:57:19 +0000 | [diff] [blame] | 52 | .globl OPENSSL_rdtsc |
| 53 | .type OPENSSL_rdtsc,\@abi-omnipotent |
| 54 | .align 16 |
| 55 | OPENSSL_rdtsc: |
| 56 | rdtsc |
| 57 | shl \$32,%rdx |
| 58 | or %rdx,%rax |
| 59 | ret |
| 60 | .size OPENSSL_rdtsc,.-OPENSSL_rdtsc |
| 61 | |
Andy Polyakov | 376729e | 2004-11-21 10:36:25 +0000 | [diff] [blame] | 62 | .globl OPENSSL_ia32_cpuid |
Andy Polyakov | c5cd28b | 2012-11-17 19:04:15 +0000 | [diff] [blame] | 63 | .type OPENSSL_ia32_cpuid,\@function,1 |
Andy Polyakov | 376729e | 2004-11-21 10:36:25 +0000 | [diff] [blame] | 64 | .align 16 |
| 65 | OPENSSL_ia32_cpuid: |
Andy Polyakov | 5e32cfb | 2017-02-25 22:17:21 +0100 | [diff] [blame] | 66 | .cfi_startproc |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 67 | mov %rbx,%r8 # save %rbx |
Andy Polyakov | 5e32cfb | 2017-02-25 22:17:21 +0100 | [diff] [blame] | 68 | .cfi_register %rbx,%r8 |
Andy Polyakov | 9babf39 | 2007-04-02 09:50:14 +0000 | [diff] [blame] | 69 | |
| 70 | xor %eax,%eax |
Andy Polyakov | 1aed5e1 | 2017-03-12 14:45:06 +0100 | [diff] [blame] | 71 | mov %eax,8(%rdi) # clear extended feature flags |
Andy Polyakov | 9babf39 | 2007-04-02 09:50:14 +0000 | [diff] [blame] | 72 | cpuid |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 73 | mov %eax,%r11d # max value for standard query level |
| 74 | |
Andy Polyakov | 9babf39 | 2007-04-02 09:50:14 +0000 | [diff] [blame] | 75 | xor %eax,%eax |
| 76 | cmp \$0x756e6547,%ebx # "Genu" |
| 77 | setne %al |
| 78 | mov %eax,%r9d |
| 79 | cmp \$0x49656e69,%edx # "ineI" |
| 80 | setne %al |
| 81 | or %eax,%r9d |
| 82 | cmp \$0x6c65746e,%ecx # "ntel" |
| 83 | setne %al |
Andy Polyakov | 5cd91b5 | 2009-05-12 21:01:13 +0000 | [diff] [blame] | 84 | or %eax,%r9d # 0 indicates Intel CPU |
Andy Polyakov | 5cd91b5 | 2009-05-12 21:01:13 +0000 | [diff] [blame] | 85 | jz .Lintel |
Andy Polyakov | 9babf39 | 2007-04-02 09:50:14 +0000 | [diff] [blame] | 86 | |
Andy Polyakov | 5cd91b5 | 2009-05-12 21:01:13 +0000 | [diff] [blame] | 87 | cmp \$0x68747541,%ebx # "Auth" |
| 88 | setne %al |
| 89 | mov %eax,%r10d |
| 90 | cmp \$0x69746E65,%edx # "enti" |
| 91 | setne %al |
| 92 | or %eax,%r10d |
| 93 | cmp \$0x444D4163,%ecx # "cAMD" |
| 94 | setne %al |
| 95 | or %eax,%r10d # 0 indicates AMD CPU |
| 96 | jnz .Lintel |
| 97 | |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 98 | # AMD specific |
Andy Polyakov | 5cd91b5 | 2009-05-12 21:01:13 +0000 | [diff] [blame] | 99 | mov \$0x80000000,%eax |
| 100 | cpuid |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 101 | cmp \$0x80000001,%eax |
| 102 | jb .Lintel |
| 103 | mov %eax,%r10d |
| 104 | mov \$0x80000001,%eax |
| 105 | cpuid |
| 106 | or %ecx,%r9d |
| 107 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 |
| 108 | |
| 109 | cmp \$0x80000008,%r10d |
Andy Polyakov | 5cd91b5 | 2009-05-12 21:01:13 +0000 | [diff] [blame] | 110 | jb .Lintel |
| 111 | |
| 112 | mov \$0x80000008,%eax |
| 113 | cpuid |
| 114 | movzb %cl,%r10 # number of cores - 1 |
| 115 | inc %r10 # number of cores |
| 116 | |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 117 | mov \$1,%eax |
| 118 | cpuid |
| 119 | bt \$28,%edx # test hyper-threading bit |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 120 | jnc .Lgeneric |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 121 | shr \$16,%ebx # number of logical processors |
| 122 | cmp %r10b,%bl |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 123 | ja .Lgeneric |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 124 | and \$0xefffffff,%edx # ~(1<<28) |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 125 | jmp .Lgeneric |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 126 | |
Andy Polyakov | 5cd91b5 | 2009-05-12 21:01:13 +0000 | [diff] [blame] | 127 | .Lintel: |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 128 | cmp \$4,%r11d |
| 129 | mov \$-1,%r10d |
| 130 | jb .Lnocacheinfo |
| 131 | |
| 132 | mov \$4,%eax |
| 133 | mov \$0,%ecx # query L1D |
| 134 | cpuid |
| 135 | mov %eax,%r10d |
| 136 | shr \$14,%r10d |
| 137 | and \$0xfff,%r10d # number of cores -1 per L1D |
| 138 | |
| 139 | .Lnocacheinfo: |
Andy Polyakov | 932cc12 | 2007-05-14 15:57:19 +0000 | [diff] [blame] | 140 | mov \$1,%eax |
Andy Polyakov | 376729e | 2004-11-21 10:36:25 +0000 | [diff] [blame] | 141 | cpuid |
Andy Polyakov | 4bb9008 | 2011-05-27 15:32:43 +0000 | [diff] [blame] | 142 | and \$0xbfefffff,%edx # force reserved bits to 0 |
Andy Polyakov | 932cc12 | 2007-05-14 15:57:19 +0000 | [diff] [blame] | 143 | cmp \$0,%r9d |
Andy Polyakov | 9babf39 | 2007-04-02 09:50:14 +0000 | [diff] [blame] | 144 | jne .Lnotintel |
Andy Polyakov | 4bb9008 | 2011-05-27 15:32:43 +0000 | [diff] [blame] | 145 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs |
Andy Polyakov | 9babf39 | 2007-04-02 09:50:14 +0000 | [diff] [blame] | 146 | and \$15,%ah |
| 147 | cmp \$15,%ah # examine Family ID |
Andy Polyakov | 4bb9008 | 2011-05-27 15:32:43 +0000 | [diff] [blame] | 148 | jne .Lnotintel |
| 149 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR |
Andy Polyakov | 9babf39 | 2007-04-02 09:50:14 +0000 | [diff] [blame] | 150 | .Lnotintel: |
Andy Polyakov | 3df2eff | 2007-07-21 14:46:27 +0000 | [diff] [blame] | 151 | bt \$28,%edx # test hyper-threading bit |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 152 | jnc .Lgeneric |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 153 | and \$0xefffffff,%edx # ~(1<<28) |
| 154 | cmp \$0,%r10d |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 155 | je .Lgeneric |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 156 | |
| 157 | or \$0x10000000,%edx # 1<<28 |
Andy Polyakov | 9babf39 | 2007-04-02 09:50:14 +0000 | [diff] [blame] | 158 | shr \$16,%ebx |
Andy Polyakov | 761393b | 2009-05-14 18:17:26 +0000 | [diff] [blame] | 159 | cmp \$1,%bl # see if cache is shared |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 160 | ja .Lgeneric |
Andy Polyakov | 932cc12 | 2007-05-14 15:57:19 +0000 | [diff] [blame] | 161 | and \$0xefffffff,%edx # ~(1<<28) |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 162 | .Lgeneric: |
| 163 | and \$0x00000800,%r9d # isolate AMD XOP flag |
| 164 | and \$0xfffff7ff,%ecx |
Andy Polyakov | 2bc3ad2 | 2011-05-26 13:16:26 +0000 | [diff] [blame] | 165 | or %ecx,%r9d # merge AMD XOP flag |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 166 | |
Andy Polyakov | 2bc3ad2 | 2011-05-26 13:16:26 +0000 | [diff] [blame] | 167 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx |
Andy Polyakov | 1aed5e1 | 2017-03-12 14:45:06 +0100 | [diff] [blame] | 168 | |
| 169 | cmp \$7,%r11d |
| 170 | jb .Lno_extended_info |
| 171 | mov \$7,%eax |
| 172 | xor %ecx,%ecx |
| 173 | cpuid |
| 174 | mov %ebx,8(%rdi) # save extended feature flags |
| 175 | .Lno_extended_info: |
| 176 | |
Andy Polyakov | 2bc3ad2 | 2011-05-26 13:16:26 +0000 | [diff] [blame] | 177 | bt \$27,%r9d # check OSXSAVE bit |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 178 | jnc .Lclear_avx |
| 179 | xor %ecx,%ecx # XCR0 |
| 180 | .byte 0x0f,0x01,0xd0 # xgetbv |
Andy Polyakov | 66bee01 | 2017-01-27 19:03:37 +0100 | [diff] [blame] | 181 | and \$0xe6,%eax # isolate XMM, YMM and ZMM state support |
| 182 | cmp \$0xe6,%eax |
| 183 | je .Ldone |
| 184 | andl \$0xfffeffff,8(%rdi) # clear AVX512F, ~(1<<16) |
| 185 | # note that we don't touch other AVX512 |
| 186 | # extensions, because they can be used |
| 187 | # with YMM (without opmasking though) |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 188 | and \$6,%eax # isolate XMM and YMM state support |
| 189 | cmp \$6,%eax |
| 190 | je .Ldone |
| 191 | .Lclear_avx: |
| 192 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) |
Andy Polyakov | 2bc3ad2 | 2011-05-26 13:16:26 +0000 | [diff] [blame] | 193 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits |
Andy Polyakov | 66bee01 | 2017-01-27 19:03:37 +0100 | [diff] [blame] | 194 | mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5) |
| 195 | and %eax,8(%rdi) # cleax AVX2 and AVX512* bits |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 196 | .Ldone: |
Andy Polyakov | 2bc3ad2 | 2011-05-26 13:16:26 +0000 | [diff] [blame] | 197 | shl \$32,%r9 |
| 198 | mov %r10d,%eax |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 199 | mov %r8,%rbx # restore %rbx |
Andy Polyakov | 5e32cfb | 2017-02-25 22:17:21 +0100 | [diff] [blame] | 200 | .cfi_restore %rbx |
Andy Polyakov | 2bc3ad2 | 2011-05-26 13:16:26 +0000 | [diff] [blame] | 201 | or %r9,%rax |
Andy Polyakov | 376729e | 2004-11-21 10:36:25 +0000 | [diff] [blame] | 202 | ret |
Andy Polyakov | 5e32cfb | 2017-02-25 22:17:21 +0100 | [diff] [blame] | 203 | .cfi_endproc |
Andy Polyakov | 376729e | 2004-11-21 10:36:25 +0000 | [diff] [blame] | 204 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 205 | |
| 206 | .globl OPENSSL_cleanse |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 207 | .type OPENSSL_cleanse,\@abi-omnipotent |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 208 | .align 16 |
| 209 | OPENSSL_cleanse: |
| 210 | xor %rax,%rax |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 211 | cmp \$15,$arg2 |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 212 | jae .Lot |
Andy Polyakov | 7676eeb | 2010-01-24 14:54:24 +0000 | [diff] [blame] | 213 | cmp \$0,$arg2 |
| 214 | je .Lret |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 215 | .Little: |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 216 | mov %al,($arg1) |
| 217 | sub \$1,$arg2 |
| 218 | lea 1($arg1),$arg1 |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 219 | jnz .Little |
Andy Polyakov | 1fd79f6 | 2010-04-14 19:24:48 +0000 | [diff] [blame] | 220 | .Lret: |
| 221 | ret |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 222 | .align 16 |
| 223 | .Lot: |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 224 | test \$7,$arg1 |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 225 | jz .Laligned |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 226 | mov %al,($arg1) |
| 227 | lea -1($arg2),$arg2 |
| 228 | lea 1($arg1),$arg1 |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 229 | jmp .Lot |
| 230 | .Laligned: |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 231 | mov %rax,($arg1) |
| 232 | lea -8($arg2),$arg2 |
| 233 | test \$-8,$arg2 |
| 234 | lea 8($arg1),$arg1 |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 235 | jnz .Laligned |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 236 | cmp \$0,$arg2 |
Andy Polyakov | b2dba9b | 2007-05-14 21:35:25 +0000 | [diff] [blame] | 237 | jne .Little |
| 238 | ret |
| 239 | .size OPENSSL_cleanse,.-OPENSSL_cleanse |
Andy Polyakov | e33826f | 2016-05-15 17:01:15 +0200 | [diff] [blame] | 240 | |
| 241 | .globl CRYPTO_memcmp |
| 242 | .type CRYPTO_memcmp,\@abi-omnipotent |
| 243 | .align 16 |
| 244 | CRYPTO_memcmp: |
| 245 | xor %rax,%rax |
| 246 | xor %r10,%r10 |
| 247 | cmp \$0,$arg3 |
| 248 | je .Lno_data |
| 249 | .Loop_cmp: |
| 250 | mov ($arg1),%r10b |
| 251 | lea 1($arg1),$arg1 |
| 252 | xor ($arg2),%r10b |
| 253 | lea 1($arg2),$arg2 |
| 254 | or %r10b,%al |
| 255 | dec $arg3 |
| 256 | jnz .Loop_cmp |
| 257 | neg %rax |
| 258 | shr \$63,%rax |
| 259 | .Lno_data: |
| 260 | ret |
| 261 | .size CRYPTO_memcmp,.-CRYPTO_memcmp |
Andy Polyakov | 14e21f8 | 2004-07-26 20:18:55 +0000 | [diff] [blame] | 262 | ___ |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 263 | |
| 264 | print<<___ if (!$win64); |
| 265 | .globl OPENSSL_wipe_cpu |
| 266 | .type OPENSSL_wipe_cpu,\@abi-omnipotent |
| 267 | .align 16 |
| 268 | OPENSSL_wipe_cpu: |
| 269 | pxor %xmm0,%xmm0 |
| 270 | pxor %xmm1,%xmm1 |
| 271 | pxor %xmm2,%xmm2 |
| 272 | pxor %xmm3,%xmm3 |
| 273 | pxor %xmm4,%xmm4 |
| 274 | pxor %xmm5,%xmm5 |
| 275 | pxor %xmm6,%xmm6 |
| 276 | pxor %xmm7,%xmm7 |
| 277 | pxor %xmm8,%xmm8 |
| 278 | pxor %xmm9,%xmm9 |
| 279 | pxor %xmm10,%xmm10 |
| 280 | pxor %xmm11,%xmm11 |
| 281 | pxor %xmm12,%xmm12 |
| 282 | pxor %xmm13,%xmm13 |
| 283 | pxor %xmm14,%xmm14 |
| 284 | pxor %xmm15,%xmm15 |
| 285 | xorq %rcx,%rcx |
| 286 | xorq %rdx,%rdx |
| 287 | xorq %rsi,%rsi |
| 288 | xorq %rdi,%rdi |
| 289 | xorq %r8,%r8 |
| 290 | xorq %r9,%r9 |
| 291 | xorq %r10,%r10 |
| 292 | xorq %r11,%r11 |
| 293 | leaq 8(%rsp),%rax |
| 294 | ret |
| 295 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 296 | ___ |
| 297 | print<<___ if ($win64); |
| 298 | .globl OPENSSL_wipe_cpu |
| 299 | .type OPENSSL_wipe_cpu,\@abi-omnipotent |
| 300 | .align 16 |
| 301 | OPENSSL_wipe_cpu: |
| 302 | pxor %xmm0,%xmm0 |
| 303 | pxor %xmm1,%xmm1 |
| 304 | pxor %xmm2,%xmm2 |
| 305 | pxor %xmm3,%xmm3 |
| 306 | pxor %xmm4,%xmm4 |
| 307 | pxor %xmm5,%xmm5 |
| 308 | xorq %rcx,%rcx |
| 309 | xorq %rdx,%rdx |
| 310 | xorq %r8,%r8 |
| 311 | xorq %r9,%r9 |
| 312 | xorq %r10,%r10 |
| 313 | xorq %r11,%r11 |
| 314 | leaq 8(%rsp),%rax |
| 315 | ret |
| 316 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 317 | ___ |
Andy Polyakov | 5fabb88 | 2011-04-17 12:46:00 +0000 | [diff] [blame] | 318 | { |
| 319 | my $out="%r10"; |
| 320 | my $cnt="%rcx"; |
| 321 | my $max="%r11"; |
| 322 | my $lasttick="%r8d"; |
| 323 | my $lastdiff="%r9d"; |
| 324 | my $redzone=win64?8:-8; |
| 325 | |
| 326 | print<<___; |
| 327 | .globl OPENSSL_instrument_bus |
| 328 | .type OPENSSL_instrument_bus,\@abi-omnipotent |
| 329 | .align 16 |
| 330 | OPENSSL_instrument_bus: |
| 331 | mov $arg1,$out # tribute to Win64 |
| 332 | mov $arg2,$cnt |
| 333 | mov $arg2,$max |
| 334 | |
| 335 | rdtsc # collect 1st tick |
| 336 | mov %eax,$lasttick # lasttick = tick |
| 337 | mov \$0,$lastdiff # lastdiff = 0 |
| 338 | clflush ($out) |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 339 | .byte 0xf0 # lock |
Andy Polyakov | 5fabb88 | 2011-04-17 12:46:00 +0000 | [diff] [blame] | 340 | add $lastdiff,($out) |
| 341 | jmp .Loop |
| 342 | .align 16 |
| 343 | .Loop: rdtsc |
| 344 | mov %eax,%edx |
| 345 | sub $lasttick,%eax |
| 346 | mov %edx,$lasttick |
| 347 | mov %eax,$lastdiff |
| 348 | clflush ($out) |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 349 | .byte 0xf0 # lock |
Andy Polyakov | 5fabb88 | 2011-04-17 12:46:00 +0000 | [diff] [blame] | 350 | add %eax,($out) |
| 351 | lea 4($out),$out |
| 352 | sub \$1,$cnt |
| 353 | jnz .Loop |
| 354 | |
| 355 | mov $max,%rax |
| 356 | ret |
| 357 | .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus |
| 358 | |
| 359 | .globl OPENSSL_instrument_bus2 |
| 360 | .type OPENSSL_instrument_bus2,\@abi-omnipotent |
| 361 | .align 16 |
| 362 | OPENSSL_instrument_bus2: |
| 363 | mov $arg1,$out # tribute to Win64 |
| 364 | mov $arg2,$cnt |
| 365 | mov $arg3,$max |
| 366 | mov $cnt,$redzone(%rsp) |
| 367 | |
| 368 | rdtsc # collect 1st tick |
| 369 | mov %eax,$lasttick # lasttick = tick |
| 370 | mov \$0,$lastdiff # lastdiff = 0 |
| 371 | |
| 372 | clflush ($out) |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 373 | .byte 0xf0 # lock |
Andy Polyakov | 5fabb88 | 2011-04-17 12:46:00 +0000 | [diff] [blame] | 374 | add $lastdiff,($out) |
| 375 | |
| 376 | rdtsc # collect 1st diff |
| 377 | mov %eax,%edx |
| 378 | sub $lasttick,%eax # diff |
| 379 | mov %edx,$lasttick # lasttick = tick |
| 380 | mov %eax,$lastdiff # lastdiff = diff |
| 381 | .Loop2: |
| 382 | clflush ($out) |
Andy Polyakov | b906422 | 2011-05-16 20:35:11 +0000 | [diff] [blame] | 383 | .byte 0xf0 # lock |
Andy Polyakov | 5fabb88 | 2011-04-17 12:46:00 +0000 | [diff] [blame] | 384 | add %eax,($out) # accumulate diff |
| 385 | |
| 386 | sub \$1,$max |
| 387 | jz .Ldone2 |
| 388 | |
| 389 | rdtsc |
| 390 | mov %eax,%edx |
| 391 | sub $lasttick,%eax # diff |
| 392 | mov %edx,$lasttick # lasttick = tick |
| 393 | cmp $lastdiff,%eax |
| 394 | mov %eax,$lastdiff # lastdiff = diff |
| 395 | mov \$0,%edx |
| 396 | setne %dl |
| 397 | sub %rdx,$cnt # conditional --$cnt |
| 398 | lea ($out,%rdx,4),$out # conditional ++$out |
| 399 | jnz .Loop2 |
| 400 | |
| 401 | .Ldone2: |
| 402 | mov $redzone(%rsp),%rax |
| 403 | sub $cnt,%rax |
| 404 | ret |
| 405 | .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 |
| 406 | ___ |
| 407 | } |
Andy Polyakov | aa8f38e | 2008-11-12 08:15:52 +0000 | [diff] [blame] | 408 | |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 409 | sub gen_random { |
| 410 | my $rdop = shift; |
Andy Polyakov | 301799b | 2011-06-04 12:20:45 +0000 | [diff] [blame] | 411 | print<<___; |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 412 | .globl OPENSSL_ia32_${rdop} |
| 413 | .type OPENSSL_ia32_${rdop},\@abi-omnipotent |
Andy Polyakov | 301799b | 2011-06-04 12:20:45 +0000 | [diff] [blame] | 414 | .align 16 |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 415 | OPENSSL_ia32_${rdop}: |
Andy Polyakov | 301799b | 2011-06-04 12:20:45 +0000 | [diff] [blame] | 416 | mov \$8,%ecx |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 417 | .Loop_${rdop}: |
| 418 | ${rdop} %rax |
| 419 | jc .Lbreak_${rdop} |
| 420 | loop .Loop_${rdop} |
| 421 | .Lbreak_${rdop}: |
Andy Polyakov | 301799b | 2011-06-04 12:20:45 +0000 | [diff] [blame] | 422 | cmp \$0,%rax |
| 423 | cmove %rcx,%rax |
| 424 | ret |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 425 | .size OPENSSL_ia32_${rdop},.-OPENSSL_ia32_${rdop} |
Andy Polyakov | f4d4564 | 2014-02-14 17:24:12 +0100 | [diff] [blame] | 426 | |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 427 | .globl OPENSSL_ia32_${rdop}_bytes |
| 428 | .type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent |
Andy Polyakov | f4d4564 | 2014-02-14 17:24:12 +0100 | [diff] [blame] | 429 | .align 16 |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 430 | OPENSSL_ia32_${rdop}_bytes: |
| 431 | xor %rax, %rax # return value |
| 432 | cmp \$0,$arg2 |
| 433 | je .Ldone_${rdop}_bytes |
| 434 | |
| 435 | mov \$8,%r11 |
| 436 | .Loop_${rdop}_bytes: |
| 437 | ${rdop} %r10 |
| 438 | jc .Lbreak_${rdop}_bytes |
| 439 | dec %r11 |
| 440 | jnz .Loop_${rdop}_bytes |
| 441 | jmp .Ldone_${rdop}_bytes |
| 442 | |
| 443 | .align 16 |
| 444 | .Lbreak_${rdop}_bytes: |
| 445 | cmp \$8,$arg2 |
| 446 | jb .Ltail_${rdop}_bytes |
| 447 | mov %r10,($arg1) |
| 448 | lea 8($arg1),$arg1 |
| 449 | add \$8,%rax |
| 450 | sub \$8,$arg2 |
| 451 | jz .Ldone_${rdop}_bytes |
| 452 | mov \$8,%r11 |
| 453 | jmp .Loop_${rdop}_bytes |
| 454 | |
| 455 | .align 16 |
| 456 | .Ltail_${rdop}_bytes: |
| 457 | mov %r10b,($arg1) |
| 458 | lea 1($arg1),$arg1 |
| 459 | inc %rax |
| 460 | shr \$8,%r8 |
| 461 | dec $arg2 |
| 462 | jnz .Ltail_${rdop}_bytes |
| 463 | |
| 464 | .Ldone_${rdop}_bytes: |
Andy Polyakov | f4d4564 | 2014-02-14 17:24:12 +0100 | [diff] [blame] | 465 | ret |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 466 | .size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes |
Andy Polyakov | 301799b | 2011-06-04 12:20:45 +0000 | [diff] [blame] | 467 | ___ |
Andy Polyakov | 9c94044 | 2016-07-10 12:05:43 +0200 | [diff] [blame] | 468 | } |
| 469 | gen_random("rdrand"); |
| 470 | gen_random("rdseed"); |
Andy Polyakov | 301799b | 2011-06-04 12:20:45 +0000 | [diff] [blame] | 471 | |
Andy Polyakov | 5d86336 | 2007-06-21 11:39:35 +0000 | [diff] [blame] | 472 | close STDOUT; # flush |