blob: 40d42135bbc8747891b0794b9ccfe07a47c66652 [file] [log] [blame]
Andy Polyakov14e21f82004-07-26 20:18:55 +00001#!/usr/bin/env perl
2
Andy Polyakovaa8f38e2008-11-12 08:15:52 +00003$flavour = shift;
4$output = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
Andy Polyakov14e21f82004-07-26 20:18:55 +00006
Andy Polyakovaa8f38e2008-11-12 08:15:52 +00007$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
Andy Polyakov376729e2004-11-21 10:36:25 +00008
Andy Polyakovaa8f38e2008-11-12 08:15:52 +00009$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
Andy Polyakov376729e2004-11-21 10:36:25 +000011
Andy Polyakov5fabb882011-04-17 12:46:00 +000012($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
13 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
14
Andy Polyakovaa8f38e2008-11-12 08:15:52 +000015print<<___;
16.extern OPENSSL_cpuid_setup
Andy Polyakovddc20d42011-05-18 16:24:19 +000017.hidden OPENSSL_cpuid_setup
Andy Polyakov932cc122007-05-14 15:57:19 +000018.section .init
19 call OPENSSL_cpuid_setup
20
Andy Polyakovddc20d42011-05-18 16:24:19 +000021.hidden OPENSSL_ia32cap_P
22.comm OPENSSL_ia32cap_P,8
23
Andy Polyakov932cc122007-05-14 15:57:19 +000024.text
25
Andy Polyakovaa8f38e2008-11-12 08:15:52 +000026.globl OPENSSL_atomic_add
27.type OPENSSL_atomic_add,\@abi-omnipotent
28.align 16
29OPENSSL_atomic_add:
30 movl ($arg1),%eax
31.Lspin: leaq ($arg2,%rax),%r8
32 .byte 0xf0 # lock
33 cmpxchgl %r8d,($arg1)
34 jne .Lspin
35 movl %r8d,%eax
36 .byte 0x48,0x98 # cltq/cdqe
37 ret
38.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
39
Andy Polyakov932cc122007-05-14 15:57:19 +000040.globl OPENSSL_rdtsc
41.type OPENSSL_rdtsc,\@abi-omnipotent
42.align 16
43OPENSSL_rdtsc:
44 rdtsc
45 shl \$32,%rdx
46 or %rdx,%rax
47 ret
48.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
49
Andy Polyakov376729e2004-11-21 10:36:25 +000050.globl OPENSSL_ia32_cpuid
Andy Polyakov932cc122007-05-14 15:57:19 +000051.type OPENSSL_ia32_cpuid,\@abi-omnipotent
Andy Polyakov376729e2004-11-21 10:36:25 +000052.align 16
53OPENSSL_ia32_cpuid:
Andy Polyakovb9064222011-05-16 20:35:11 +000054 mov %rbx,%r8 # save %rbx
Andy Polyakov9babf392007-04-02 09:50:14 +000055
56 xor %eax,%eax
57 cpuid
Andy Polyakov761393b2009-05-14 18:17:26 +000058 mov %eax,%r11d # max value for standard query level
59
Andy Polyakov9babf392007-04-02 09:50:14 +000060 xor %eax,%eax
61 cmp \$0x756e6547,%ebx # "Genu"
62 setne %al
63 mov %eax,%r9d
64 cmp \$0x49656e69,%edx # "ineI"
65 setne %al
66 or %eax,%r9d
67 cmp \$0x6c65746e,%ecx # "ntel"
68 setne %al
Andy Polyakov5cd91b52009-05-12 21:01:13 +000069 or %eax,%r9d # 0 indicates Intel CPU
Andy Polyakov5cd91b52009-05-12 21:01:13 +000070 jz .Lintel
Andy Polyakov9babf392007-04-02 09:50:14 +000071
Andy Polyakov5cd91b52009-05-12 21:01:13 +000072 cmp \$0x68747541,%ebx # "Auth"
73 setne %al
74 mov %eax,%r10d
75 cmp \$0x69746E65,%edx # "enti"
76 setne %al
77 or %eax,%r10d
78 cmp \$0x444D4163,%ecx # "cAMD"
79 setne %al
80 or %eax,%r10d # 0 indicates AMD CPU
81 jnz .Lintel
82
Andy Polyakov761393b2009-05-14 18:17:26 +000083 # AMD specific
Andy Polyakov5cd91b52009-05-12 21:01:13 +000084 mov \$0x80000000,%eax
85 cpuid
Andy Polyakovb9064222011-05-16 20:35:11 +000086 cmp \$0x80000001,%eax
87 jb .Lintel
88 mov %eax,%r10d
89 mov \$0x80000001,%eax
90 cpuid
91 or %ecx,%r9d
92 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
93
94 cmp \$0x80000008,%r10d
Andy Polyakov5cd91b52009-05-12 21:01:13 +000095 jb .Lintel
96
97 mov \$0x80000008,%eax
98 cpuid
99 movzb %cl,%r10 # number of cores - 1
100 inc %r10 # number of cores
101
Andy Polyakov761393b2009-05-14 18:17:26 +0000102 mov \$1,%eax
103 cpuid
104 bt \$28,%edx # test hyper-threading bit
Andy Polyakovb9064222011-05-16 20:35:11 +0000105 jnc .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000106 shr \$16,%ebx # number of logical processors
107 cmp %r10b,%bl
Andy Polyakovb9064222011-05-16 20:35:11 +0000108 ja .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000109 and \$0xefffffff,%edx # ~(1<<28)
Andy Polyakovb9064222011-05-16 20:35:11 +0000110 jmp .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000111
Andy Polyakov5cd91b52009-05-12 21:01:13 +0000112.Lintel:
Andy Polyakov761393b2009-05-14 18:17:26 +0000113 cmp \$4,%r11d
114 mov \$-1,%r10d
115 jb .Lnocacheinfo
116
117 mov \$4,%eax
118 mov \$0,%ecx # query L1D
119 cpuid
120 mov %eax,%r10d
121 shr \$14,%r10d
122 and \$0xfff,%r10d # number of cores -1 per L1D
123
124.Lnocacheinfo:
Andy Polyakov932cc122007-05-14 15:57:19 +0000125 mov \$1,%eax
Andy Polyakov376729e2004-11-21 10:36:25 +0000126 cpuid
Andy Polyakov932cc122007-05-14 15:57:19 +0000127 cmp \$0,%r9d
Andy Polyakov9babf392007-04-02 09:50:14 +0000128 jne .Lnotintel
Andy Polyakov55eab3b2007-08-23 12:01:58 +0000129 or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR
Andy Polyakov9babf392007-04-02 09:50:14 +0000130 and \$15,%ah
131 cmp \$15,%ah # examine Family ID
132 je .Lnotintel
Andy Polyakov55eab3b2007-08-23 12:01:58 +0000133 or \$0x40000000,%edx # use reserved bit to skip unrolled loop
Andy Polyakov9babf392007-04-02 09:50:14 +0000134.Lnotintel:
Andy Polyakov3df2eff2007-07-21 14:46:27 +0000135 bt \$28,%edx # test hyper-threading bit
Andy Polyakovb9064222011-05-16 20:35:11 +0000136 jnc .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000137 and \$0xefffffff,%edx # ~(1<<28)
138 cmp \$0,%r10d
Andy Polyakovb9064222011-05-16 20:35:11 +0000139 je .Lgeneric
Andy Polyakov761393b2009-05-14 18:17:26 +0000140
141 or \$0x10000000,%edx # 1<<28
Andy Polyakov9babf392007-04-02 09:50:14 +0000142 shr \$16,%ebx
Andy Polyakov761393b2009-05-14 18:17:26 +0000143 cmp \$1,%bl # see if cache is shared
Andy Polyakovb9064222011-05-16 20:35:11 +0000144 ja .Lgeneric
Andy Polyakov932cc122007-05-14 15:57:19 +0000145 and \$0xefffffff,%edx # ~(1<<28)
Andy Polyakovb9064222011-05-16 20:35:11 +0000146.Lgeneric:
147 and \$0x00000800,%r9d # isolate AMD XOP flag
148 and \$0xfffff7ff,%ecx
149 or %r9d,%ecx # merge AMD XOP flag
150
Andy Polyakov932cc122007-05-14 15:57:19 +0000151 shl \$32,%rcx
Andy Polyakovb9064222011-05-16 20:35:11 +0000152 mov %edx,%ebx
153 or %rcx,%rbx # compose capability vector in %rbx
154 bt \$27+32,%rcx # check OSXSAVE bit
155 jnc .Lclear_avx
156 xor %ecx,%ecx # XCR0
157 .byte 0x0f,0x01,0xd0 # xgetbv
158 and \$6,%eax # isolate XMM and YMM state support
159 cmp \$6,%eax
160 je .Ldone
161.Lclear_avx:
162 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
163 shl \$32,%rax
164 and %rax,%rbx # clear AVX, FMA and AMD XOP bits
165.Ldone:
166 mov %rbx,%rax
167 mov %r8,%rbx # restore %rbx
Andy Polyakov376729e2004-11-21 10:36:25 +0000168 ret
169.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000170
171.globl OPENSSL_cleanse
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000172.type OPENSSL_cleanse,\@abi-omnipotent
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000173.align 16
174OPENSSL_cleanse:
175 xor %rax,%rax
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000176 cmp \$15,$arg2
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000177 jae .Lot
Andy Polyakov7676eeb2010-01-24 14:54:24 +0000178 cmp \$0,$arg2
179 je .Lret
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000180.Little:
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000181 mov %al,($arg1)
182 sub \$1,$arg2
183 lea 1($arg1),$arg1
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000184 jnz .Little
Andy Polyakov1fd79f62010-04-14 19:24:48 +0000185.Lret:
186 ret
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000187.align 16
188.Lot:
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000189 test \$7,$arg1
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000190 jz .Laligned
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000191 mov %al,($arg1)
192 lea -1($arg2),$arg2
193 lea 1($arg1),$arg1
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000194 jmp .Lot
195.Laligned:
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000196 mov %rax,($arg1)
197 lea -8($arg2),$arg2
198 test \$-8,$arg2
199 lea 8($arg1),$arg1
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000200 jnz .Laligned
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000201 cmp \$0,$arg2
Andy Polyakovb2dba9b2007-05-14 21:35:25 +0000202 jne .Little
203 ret
204.size OPENSSL_cleanse,.-OPENSSL_cleanse
Andy Polyakov14e21f82004-07-26 20:18:55 +0000205___
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000206
207print<<___ if (!$win64);
208.globl OPENSSL_wipe_cpu
209.type OPENSSL_wipe_cpu,\@abi-omnipotent
210.align 16
211OPENSSL_wipe_cpu:
212 pxor %xmm0,%xmm0
213 pxor %xmm1,%xmm1
214 pxor %xmm2,%xmm2
215 pxor %xmm3,%xmm3
216 pxor %xmm4,%xmm4
217 pxor %xmm5,%xmm5
218 pxor %xmm6,%xmm6
219 pxor %xmm7,%xmm7
220 pxor %xmm8,%xmm8
221 pxor %xmm9,%xmm9
222 pxor %xmm10,%xmm10
223 pxor %xmm11,%xmm11
224 pxor %xmm12,%xmm12
225 pxor %xmm13,%xmm13
226 pxor %xmm14,%xmm14
227 pxor %xmm15,%xmm15
228 xorq %rcx,%rcx
229 xorq %rdx,%rdx
230 xorq %rsi,%rsi
231 xorq %rdi,%rdi
232 xorq %r8,%r8
233 xorq %r9,%r9
234 xorq %r10,%r10
235 xorq %r11,%r11
236 leaq 8(%rsp),%rax
237 ret
238.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
239___
240print<<___ if ($win64);
241.globl OPENSSL_wipe_cpu
242.type OPENSSL_wipe_cpu,\@abi-omnipotent
243.align 16
244OPENSSL_wipe_cpu:
245 pxor %xmm0,%xmm0
246 pxor %xmm1,%xmm1
247 pxor %xmm2,%xmm2
248 pxor %xmm3,%xmm3
249 pxor %xmm4,%xmm4
250 pxor %xmm5,%xmm5
251 xorq %rcx,%rcx
252 xorq %rdx,%rdx
253 xorq %r8,%r8
254 xorq %r9,%r9
255 xorq %r10,%r10
256 xorq %r11,%r11
257 leaq 8(%rsp),%rax
258 ret
259.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
260___
Andy Polyakov5fabb882011-04-17 12:46:00 +0000261{
262my $out="%r10";
263my $cnt="%rcx";
264my $max="%r11";
265my $lasttick="%r8d";
266my $lastdiff="%r9d";
267my $redzone=win64?8:-8;
268
269print<<___;
270.globl OPENSSL_instrument_bus
271.type OPENSSL_instrument_bus,\@abi-omnipotent
272.align 16
273OPENSSL_instrument_bus:
274 mov $arg1,$out # tribute to Win64
275 mov $arg2,$cnt
276 mov $arg2,$max
277
278 rdtsc # collect 1st tick
279 mov %eax,$lasttick # lasttick = tick
280 mov \$0,$lastdiff # lastdiff = 0
281 clflush ($out)
Andy Polyakovb9064222011-05-16 20:35:11 +0000282 .byte 0xf0 # lock
Andy Polyakov5fabb882011-04-17 12:46:00 +0000283 add $lastdiff,($out)
284 jmp .Loop
285.align 16
286.Loop: rdtsc
287 mov %eax,%edx
288 sub $lasttick,%eax
289 mov %edx,$lasttick
290 mov %eax,$lastdiff
291 clflush ($out)
Andy Polyakovb9064222011-05-16 20:35:11 +0000292 .byte 0xf0 # lock
Andy Polyakov5fabb882011-04-17 12:46:00 +0000293 add %eax,($out)
294 lea 4($out),$out
295 sub \$1,$cnt
296 jnz .Loop
297
298 mov $max,%rax
299 ret
300.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
301
302.globl OPENSSL_instrument_bus2
303.type OPENSSL_instrument_bus2,\@abi-omnipotent
304.align 16
305OPENSSL_instrument_bus2:
306 mov $arg1,$out # tribute to Win64
307 mov $arg2,$cnt
308 mov $arg3,$max
309 mov $cnt,$redzone(%rsp)
310
311 rdtsc # collect 1st tick
312 mov %eax,$lasttick # lasttick = tick
313 mov \$0,$lastdiff # lastdiff = 0
314
315 clflush ($out)
Andy Polyakovb9064222011-05-16 20:35:11 +0000316 .byte 0xf0 # lock
Andy Polyakov5fabb882011-04-17 12:46:00 +0000317 add $lastdiff,($out)
318
319 rdtsc # collect 1st diff
320 mov %eax,%edx
321 sub $lasttick,%eax # diff
322 mov %edx,$lasttick # lasttick = tick
323 mov %eax,$lastdiff # lastdiff = diff
324.Loop2:
325 clflush ($out)
Andy Polyakovb9064222011-05-16 20:35:11 +0000326 .byte 0xf0 # lock
Andy Polyakov5fabb882011-04-17 12:46:00 +0000327 add %eax,($out) # accumulate diff
328
329 sub \$1,$max
330 jz .Ldone2
331
332 rdtsc
333 mov %eax,%edx
334 sub $lasttick,%eax # diff
335 mov %edx,$lasttick # lasttick = tick
336 cmp $lastdiff,%eax
337 mov %eax,$lastdiff # lastdiff = diff
338 mov \$0,%edx
339 setne %dl
340 sub %rdx,$cnt # conditional --$cnt
341 lea ($out,%rdx,4),$out # conditional ++$out
342 jnz .Loop2
343
344.Ldone2:
345 mov $redzone(%rsp),%rax
346 sub $cnt,%rax
347 ret
348.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
349___
350}
Andy Polyakovaa8f38e2008-11-12 08:15:52 +0000351
Andy Polyakov5d863362007-06-21 11:39:35 +0000352close STDOUT; # flush