blob: 2467af7e9e70b695b74611d78fd79df36aeae590 [file] [log] [blame]
Rich Salze0a65192016-04-19 22:10:43 -04001#! /usr/bin/env perl
2# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
Andy Polyakov14e21f82004-07-26 20:18:55 +00009
Andy Polyakovaa8f38e2008-11-12 08:15:52 +000010$flavour = shift;
11$output = shift;
12if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
Andy Polyakov14e21f82004-07-26 20:18:55 +000013
Andy Polyakovaa8f38e2008-11-12 08:15:52 +000014$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
Andy Polyakov376729e2004-11-21 10:36:25 +000015
Andy Polyakovaa8f38e2008-11-12 08:15:52 +000016$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
Andy Polyakov301799b2011-06-04 12:20:45 +000017( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
18( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
19die "can't locate x86_64-xlate.pl";
20
Andy Polyakovcfe1d992016-05-28 16:25:38 +020021open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
Andy Polyakov46bf83f2013-01-22 22:11:31 +010022*STDOUT=*OUT;
Andy Polyakov376729e2004-11-21 10:36:25 +000023
Andy Polyakov5fabb882011-04-17 12:46:00 +000024($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
25 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
26
Andy Polyakovaa8f38e2008-11-12 08:15:52 +000027print<<___;
28.extern OPENSSL_cpuid_setup
Andy Polyakovddc20d42011-05-18 16:24:19 +000029.hidden OPENSSL_cpuid_setup
Andy Polyakov932cc122007-05-14 15:57:19 +000030.section .init
31 call OPENSSL_cpuid_setup
32
Andy Polyakovddc20d42011-05-18 16:24:19 +000033.hidden OPENSSL_ia32cap_P
Andy Polyakovc5cd28b2012-11-17 19:04:15 +000034.comm OPENSSL_ia32cap_P,16,4
Andy Polyakovddc20d42011-05-18 16:24:19 +000035
Andy Polyakov932cc122007-05-14 15:57:19 +000036.text
37
Andy Polyakovaa8f38e2008-11-12 08:15:52 +000038.globl OPENSSL_atomic_add
39.type OPENSSL_atomic_add,\@abi-omnipotent
40.align 16
41OPENSSL_atomic_add:
42 movl ($arg1),%eax
43.Lspin: leaq ($arg2,%rax),%r8
44 .byte 0xf0 # lock
45 cmpxchgl %r8d,($arg1)
46 jne .Lspin
47 movl %r8d,%eax
48 .byte 0x48,0x98 # cltq/cdqe
49 ret
50.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
51
Andy Polyakov932cc122007-05-14 15:57:19 +000052.globl OPENSSL_rdtsc
53.type OPENSSL_rdtsc,\@abi-omnipotent
54.align 16
55OPENSSL_rdtsc:
56 rdtsc
57 shl \$32,%rdx
58 or %rdx,%rax
59 ret
60.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
61
Andy Polyakov376729e2004-11-21 10:36:25 +000062.globl OPENSSL_ia32_cpuid
Andy Polyakovc5cd28b2012-11-17 19:04:15 +000063.type OPENSSL_ia32_cpuid,\@function,1
Andy Polyakov376729e2004-11-21 10:36:25 +000064.align 16
65OPENSSL_ia32_cpuid:
Andy Polyakov5e32cfb2017-02-25 22:17:21 +010066.cfi_startproc
Andy Polyakovb9064222011-05-16 20:35:11 +000067 mov %rbx,%r8 # save %rbx
Andy Polyakov5e32cfb2017-02-25 22:17:21 +010068.cfi_register %rbx,%r8
Andy Polyakov9babf392007-04-02 09:50:14 +000069
70 xor %eax,%eax
Andy Polyakov1aed5e12017-03-12 14:45:06 +010071 mov %eax,8(%rdi) # clear extended feature flags
Andy Polyakov9babf392007-04-02 09:50:14 +000072 cpuid
Andy Polyakov761393b2009-05-14 18:17:26 +000073 mov %eax,%r11d # max value for standard query level
74
Andy Polyakov9babf392007-04-02 09:50:14 +000075 xor %eax,%eax
76 cmp \$0x756e6547,%ebx # "Genu"
77 setne %al
78 mov %eax,%r9d
79 cmp \$0x49656e69,%edx # "ineI"
80 setne %al
81 or %eax,%r9d
82 cmp \$0x6c65746e,%ecx # "ntel"
83 setne %al
Andy Polyakov5cd91b52009-05-12 21:01:13 +000084 or %eax,%r9d # 0 indicates Intel CPU
Andy Polyakov5cd91b52009-05-12 21:01:13 +000085 jz .Lintel
Andy Polyakov9babf392007-04-02 09:50:14 +000086
Andy Polyakov5cd91b52009-05-12 21:01:13 +000087 cmp \$0x68747541,%ebx # "Auth"
88 setne %al
89 mov %eax,%r10d
90 cmp \$0x69746E65,%edx # "enti"
91 setne %al
92 or %eax,%r10d
93 cmp \$0x444D4163,%ecx # "cAMD"
94 setne %al
95 or %eax,%r10d # 0 indicates AMD CPU
96 jnz .Lintel
97
Andy Polyakov761393b2009-05-14 18:17:26 +000098 # AMD specific
Andy Polyakov5cd91b52009-05-12 21:01:13 +000099 mov \$0x80000000,%eax
100 cpuid
Andy Polyakovb9064222011-05-16 20:35:11 +0000101 cmp \$0x80000001,%eax
102 jb .Lintel
103 mov %eax,%r10d
104 mov \$0x80000001,%eax
105 cpuid
106 or %ecx,%r9d
107 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
108
109 cmp \$0x80000008,%r10d
Andy Polyakov5cd91b52009-05-12 21:01:13 +0000110 jb .Lintel
111
112 mov \$0x80000008,%eax
113 cpuid
114 movzb %cl,%r10 # number of cores - 1
115 inc %r10 # number of cores
116
Andy Polyakov761393b2009-05-14 18:17:26 +0000117 mov \$1,%eax
118 cpuid
119 bt \$28,%edx # test hyper-threading bit
Andy Polyakovb9064222011-05-16 20:35:11 +0000120 jnc .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000121 shr \$16,%ebx # number of logical processors
122 cmp %r10b,%bl
Andy Polyakovb9064222011-05-16 20:35:11 +0000123 ja .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000124 and \$0xefffffff,%edx # ~(1<<28)
Andy Polyakovb9064222011-05-16 20:35:11 +0000125 jmp .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000126
Andy Polyakov5cd91b52009-05-12 21:01:13 +0000127.Lintel:
Andy Polyakov761393b2009-05-14 18:17:26 +0000128 cmp \$4,%r11d
129 mov \$-1,%r10d
130 jb .Lnocacheinfo
131
132 mov \$4,%eax
133 mov \$0,%ecx # query L1D
134 cpuid
135 mov %eax,%r10d
136 shr \$14,%r10d
137 and \$0xfff,%r10d # number of cores -1 per L1D
138
139.Lnocacheinfo:
Andy Polyakov932cc122007-05-14 15:57:19 +0000140 mov \$1,%eax
Andy Polyakov376729e2004-11-21 10:36:25 +0000141 cpuid
Andy Polyakov4bb90082011-05-27 15:32:43 +0000142 and \$0xbfefffff,%edx # force reserved bits to 0
Andy Polyakov932cc122007-05-14 15:57:19 +0000143 cmp \$0,%r9d
Andy Polyakov9babf392007-04-02 09:50:14 +0000144 jne .Lnotintel
Andy Polyakov4bb90082011-05-27 15:32:43 +0000145 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
Andy Polyakov9babf392007-04-02 09:50:14 +0000146 and \$15,%ah
147 cmp \$15,%ah # examine Family ID
Andy Polyakov4bb90082011-05-27 15:32:43 +0000148 jne .Lnotintel
149 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
Andy Polyakov9babf392007-04-02 09:50:14 +0000150.Lnotintel:
Andy Polyakov3df2eff2007-07-21 14:46:27 +0000151 bt \$28,%edx # test hyper-threading bit
Andy Polyakovb9064222011-05-16 20:35:11 +0000152 jnc .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000153 and \$0xefffffff,%edx # ~(1<<28)
154 cmp \$0,%r10d
Andy Polyakovb9064222011-05-16 20:35:11 +0000155 je .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000156
157 or \$0x10000000,%edx # 1<<28
Andy Polyakov9babf392007-04-02 09:50:14 +0000158 shr \$16,%ebx
Andy Polyakov761393b2009-05-14 18:17:26 +0000159 cmp \$1,%bl # see if cache is shared
Andy Polyakovb9064222011-05-16 20:35:11 +0000160 ja .Lgeneric
Andy Polyakov932cc122007-05-14 15:57:19 +0000161 and \$0xefffffff,%edx # ~(1<<28)
Andy Polyakovb9064222011-05-16 20:35:11 +0000162.Lgeneric:
163 and \$0x00000800,%r9d # isolate AMD XOP flag
164 and \$0xfffff7ff,%ecx
Andy Polyakov2bc3ad22011-05-26 13:16:26 +0000165 or %ecx,%r9d # merge AMD XOP flag
Andy Polyakovb9064222011-05-16 20:35:11 +0000166
Andy Polyakov2bc3ad22011-05-26 13:16:26 +0000167 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
Andy Polyakov1aed5e12017-03-12 14:45:06 +0100168
169 cmp \$7,%r11d
170 jb .Lno_extended_info
171 mov \$7,%eax
172 xor %ecx,%ecx
173 cpuid
174 mov %ebx,8(%rdi) # save extended feature flags
175.Lno_extended_info:
176
Andy Polyakov2bc3ad22011-05-26 13:16:26 +0000177 bt \$27,%r9d # check OSXSAVE bit
Andy Polyakovb9064222011-05-16 20:35:11 +0000178 jnc .Lclear_avx
179 xor %ecx,%ecx # XCR0
180 .byte 0x0f,0x01,0xd0 # xgetbv
Andy Polyakov66bee012017-01-27 19:03:37 +0100181 and \$0xe6,%eax # isolate XMM, YMM and ZMM state support
182 cmp \$0xe6,%eax
183 je .Ldone
184 andl \$0xfffeffff,8(%rdi) # clear AVX512F, ~(1<<16)
185 # note that we don't touch other AVX512
186 # extensions, because they can be used
187 # with YMM (without opmasking though)
Andy Polyakovb9064222011-05-16 20:35:11 +0000188 and \$6,%eax # isolate XMM and YMM state support
189 cmp \$6,%eax
190 je .Ldone
191.Lclear_avx:
192 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
Andy Polyakov2bc3ad22011-05-26 13:16:26 +0000193 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
Andy Polyakov66bee012017-01-27 19:03:37 +0100194 mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
195 and %eax,8(%rdi) # cleax AVX2 and AVX512* bits
Andy Polyakovb9064222011-05-16 20:35:11 +0000196.Ldone:
Andy Polyakov2bc3ad22011-05-26 13:16:26 +0000197 shl \$32,%r9
198 mov %r10d,%eax
Andy Polyakovb9064222011-05-16 20:35:11 +0000199 mov %r8,%rbx # restore %rbx
Andy Polyakov5e32cfb2017-02-25 22:17:21 +0100200.cfi_restore %rbx
Andy Polyakov2bc3ad22011-05-26 13:16:26 +0000201 or %r9,%rax
Andy Polyakov376729e2004-11-21 10:36:25 +0000202 ret
Andy Polyakov5e32cfb2017-02-25 22:17:21 +0100203.cfi_endproc
Andy Polyakov376729e2004-11-21 10:36:25 +0000204.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000205
206.globl OPENSSL_cleanse
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000207.type OPENSSL_cleanse,\@abi-omnipotent
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000208.align 16
209OPENSSL_cleanse:
210 xor %rax,%rax
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000211 cmp \$15,$arg2
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000212 jae .Lot
Andy Polyakov7676eeb2010-01-24 14:54:24 +0000213 cmp \$0,$arg2
214 je .Lret
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000215.Little:
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000216 mov %al,($arg1)
217 sub \$1,$arg2
218 lea 1($arg1),$arg1
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000219 jnz .Little
Andy Polyakov1fd79f62010-04-14 19:24:48 +0000220.Lret:
221 ret
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000222.align 16
223.Lot:
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000224 test \$7,$arg1
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000225 jz .Laligned
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000226 mov %al,($arg1)
227 lea -1($arg2),$arg2
228 lea 1($arg1),$arg1
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000229 jmp .Lot
230.Laligned:
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000231 mov %rax,($arg1)
232 lea -8($arg2),$arg2
233 test \$-8,$arg2
234 lea 8($arg1),$arg1
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000235 jnz .Laligned
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000236 cmp \$0,$arg2
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000237 jne .Little
238 ret
239.size OPENSSL_cleanse,.-OPENSSL_cleanse
Andy Polyakove33826f2016-05-15 17:01:15 +0200240
241.globl CRYPTO_memcmp
242.type CRYPTO_memcmp,\@abi-omnipotent
243.align 16
244CRYPTO_memcmp:
245 xor %rax,%rax
246 xor %r10,%r10
247 cmp \$0,$arg3
248 je .Lno_data
249.Loop_cmp:
250 mov ($arg1),%r10b
251 lea 1($arg1),$arg1
252 xor ($arg2),%r10b
253 lea 1($arg2),$arg2
254 or %r10b,%al
255 dec $arg3
256 jnz .Loop_cmp
257 neg %rax
258 shr \$63,%rax
259.Lno_data:
260 ret
261.size CRYPTO_memcmp,.-CRYPTO_memcmp
Andy Polyakov14e21f82004-07-26 20:18:55 +0000262___
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000263
264print<<___ if (!$win64);
265.globl OPENSSL_wipe_cpu
266.type OPENSSL_wipe_cpu,\@abi-omnipotent
267.align 16
268OPENSSL_wipe_cpu:
269 pxor %xmm0,%xmm0
270 pxor %xmm1,%xmm1
271 pxor %xmm2,%xmm2
272 pxor %xmm3,%xmm3
273 pxor %xmm4,%xmm4
274 pxor %xmm5,%xmm5
275 pxor %xmm6,%xmm6
276 pxor %xmm7,%xmm7
277 pxor %xmm8,%xmm8
278 pxor %xmm9,%xmm9
279 pxor %xmm10,%xmm10
280 pxor %xmm11,%xmm11
281 pxor %xmm12,%xmm12
282 pxor %xmm13,%xmm13
283 pxor %xmm14,%xmm14
284 pxor %xmm15,%xmm15
285 xorq %rcx,%rcx
286 xorq %rdx,%rdx
287 xorq %rsi,%rsi
288 xorq %rdi,%rdi
289 xorq %r8,%r8
290 xorq %r9,%r9
291 xorq %r10,%r10
292 xorq %r11,%r11
293 leaq 8(%rsp),%rax
294 ret
295.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
296___
297print<<___ if ($win64);
298.globl OPENSSL_wipe_cpu
299.type OPENSSL_wipe_cpu,\@abi-omnipotent
300.align 16
301OPENSSL_wipe_cpu:
302 pxor %xmm0,%xmm0
303 pxor %xmm1,%xmm1
304 pxor %xmm2,%xmm2
305 pxor %xmm3,%xmm3
306 pxor %xmm4,%xmm4
307 pxor %xmm5,%xmm5
308 xorq %rcx,%rcx
309 xorq %rdx,%rdx
310 xorq %r8,%r8
311 xorq %r9,%r9
312 xorq %r10,%r10
313 xorq %r11,%r11
314 leaq 8(%rsp),%rax
315 ret
316.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
317___
Andy Polyakov5fabb882011-04-17 12:46:00 +0000318{
319my $out="%r10";
320my $cnt="%rcx";
321my $max="%r11";
322my $lasttick="%r8d";
323my $lastdiff="%r9d";
324my $redzone=win64?8:-8;
325
326print<<___;
327.globl OPENSSL_instrument_bus
328.type OPENSSL_instrument_bus,\@abi-omnipotent
329.align 16
330OPENSSL_instrument_bus:
331 mov $arg1,$out # tribute to Win64
332 mov $arg2,$cnt
333 mov $arg2,$max
334
335 rdtsc # collect 1st tick
336 mov %eax,$lasttick # lasttick = tick
337 mov \$0,$lastdiff # lastdiff = 0
338 clflush ($out)
Andy Polyakovb9064222011-05-16 20:35:11 +0000339 .byte 0xf0 # lock
Andy Polyakov5fabb882011-04-17 12:46:00 +0000340 add $lastdiff,($out)
341 jmp .Loop
342.align 16
343.Loop: rdtsc
344 mov %eax,%edx
345 sub $lasttick,%eax
346 mov %edx,$lasttick
347 mov %eax,$lastdiff
348 clflush ($out)
Andy Polyakovb9064222011-05-16 20:35:11 +0000349 .byte 0xf0 # lock
Andy Polyakov5fabb882011-04-17 12:46:00 +0000350 add %eax,($out)
351 lea 4($out),$out
352 sub \$1,$cnt
353 jnz .Loop
354
355 mov $max,%rax
356 ret
357.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
358
359.globl OPENSSL_instrument_bus2
360.type OPENSSL_instrument_bus2,\@abi-omnipotent
361.align 16
362OPENSSL_instrument_bus2:
363 mov $arg1,$out # tribute to Win64
364 mov $arg2,$cnt
365 mov $arg3,$max
366 mov $cnt,$redzone(%rsp)
367
368 rdtsc # collect 1st tick
369 mov %eax,$lasttick # lasttick = tick
370 mov \$0,$lastdiff # lastdiff = 0
371
372 clflush ($out)
Andy Polyakovb9064222011-05-16 20:35:11 +0000373 .byte 0xf0 # lock
Andy Polyakov5fabb882011-04-17 12:46:00 +0000374 add $lastdiff,($out)
375
376 rdtsc # collect 1st diff
377 mov %eax,%edx
378 sub $lasttick,%eax # diff
379 mov %edx,$lasttick # lasttick = tick
380 mov %eax,$lastdiff # lastdiff = diff
381.Loop2:
382 clflush ($out)
Andy Polyakovb9064222011-05-16 20:35:11 +0000383 .byte 0xf0 # lock
Andy Polyakov5fabb882011-04-17 12:46:00 +0000384 add %eax,($out) # accumulate diff
385
386 sub \$1,$max
387 jz .Ldone2
388
389 rdtsc
390 mov %eax,%edx
391 sub $lasttick,%eax # diff
392 mov %edx,$lasttick # lasttick = tick
393 cmp $lastdiff,%eax
394 mov %eax,$lastdiff # lastdiff = diff
395 mov \$0,%edx
396 setne %dl
397 sub %rdx,$cnt # conditional --$cnt
398 lea ($out,%rdx,4),$out # conditional ++$out
399 jnz .Loop2
400
401.Ldone2:
402 mov $redzone(%rsp),%rax
403 sub $cnt,%rax
404 ret
405.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
406___
407}
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000408
Andy Polyakov9c940442016-07-10 12:05:43 +0200409sub gen_random {
410my $rdop = shift;
Andy Polyakov301799b2011-06-04 12:20:45 +0000411print<<___;
Andy Polyakov9c940442016-07-10 12:05:43 +0200412.globl OPENSSL_ia32_${rdop}
413.type OPENSSL_ia32_${rdop},\@abi-omnipotent
Andy Polyakov301799b2011-06-04 12:20:45 +0000414.align 16
Andy Polyakov9c940442016-07-10 12:05:43 +0200415OPENSSL_ia32_${rdop}:
Andy Polyakov301799b2011-06-04 12:20:45 +0000416 mov \$8,%ecx
Andy Polyakov9c940442016-07-10 12:05:43 +0200417.Loop_${rdop}:
418 ${rdop} %rax
419 jc .Lbreak_${rdop}
420 loop .Loop_${rdop}
421.Lbreak_${rdop}:
Andy Polyakov301799b2011-06-04 12:20:45 +0000422 cmp \$0,%rax
423 cmove %rcx,%rax
424 ret
Andy Polyakov9c940442016-07-10 12:05:43 +0200425.size OPENSSL_ia32_${rdop},.-OPENSSL_ia32_${rdop}
Andy Polyakovf4d45642014-02-14 17:24:12 +0100426
Andy Polyakov9c940442016-07-10 12:05:43 +0200427.globl OPENSSL_ia32_${rdop}_bytes
428.type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent
Andy Polyakovf4d45642014-02-14 17:24:12 +0100429.align 16
Andy Polyakov9c940442016-07-10 12:05:43 +0200430OPENSSL_ia32_${rdop}_bytes:
431 xor %rax, %rax # return value
432 cmp \$0,$arg2
433 je .Ldone_${rdop}_bytes
434
435 mov \$8,%r11
436.Loop_${rdop}_bytes:
437 ${rdop} %r10
438 jc .Lbreak_${rdop}_bytes
439 dec %r11
440 jnz .Loop_${rdop}_bytes
441 jmp .Ldone_${rdop}_bytes
442
443.align 16
444.Lbreak_${rdop}_bytes:
445 cmp \$8,$arg2
446 jb .Ltail_${rdop}_bytes
447 mov %r10,($arg1)
448 lea 8($arg1),$arg1
449 add \$8,%rax
450 sub \$8,$arg2
451 jz .Ldone_${rdop}_bytes
452 mov \$8,%r11
453 jmp .Loop_${rdop}_bytes
454
455.align 16
456.Ltail_${rdop}_bytes:
457 mov %r10b,($arg1)
458 lea 1($arg1),$arg1
459 inc %rax
460 shr \$8,%r8
461 dec $arg2
462 jnz .Ltail_${rdop}_bytes
463
464.Ldone_${rdop}_bytes:
Andy Polyakovf4d45642014-02-14 17:24:12 +0100465 ret
Andy Polyakov9c940442016-07-10 12:05:43 +0200466.size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes
Andy Polyakov301799b2011-06-04 12:20:45 +0000467___
Andy Polyakov9c940442016-07-10 12:05:43 +0200468}
469gen_random("rdrand");
470gen_random("rdseed");
Andy Polyakov301799b2011-06-04 12:20:45 +0000471
Andy Polyakov5d863362007-06-21 11:39:35 +0000472close STDOUT; # flush