x86_64 assembler pack to comply with updated styling x86_64-xlate.pl rules.
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index e19ecdb..c54b9e3 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -1,110 +1,37 @@
 #!/usr/bin/env perl
 
-$output=shift;
-$masm=1 if ($output =~ /\.asm/);
-open STDOUT,">$output" || die "can't open $output: $!";
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
 
-print<<___ if(defined($masm));
-_TEXT	SEGMENT
-PUBLIC	OPENSSL_rdtsc
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
 
-PUBLIC	OPENSSL_atomic_add
-ALIGN	16
-OPENSSL_atomic_add	PROC
-	mov	eax,DWORD PTR[rcx]
-\$Lspin:	lea	r8,DWORD PTR[rdx+rax]
-lock	cmpxchg	DWORD PTR[rcx],r8d
-	jne	\$Lspin
-	mov	eax,r8d
-	cdqe    
-	ret
-OPENSSL_atomic_add	ENDP
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
 
-PUBLIC	OPENSSL_wipe_cpu
-ALIGN	16
-OPENSSL_wipe_cpu	PROC
-	pxor	xmm0,xmm0
-	pxor	xmm1,xmm1
-	pxor	xmm2,xmm2
-	pxor	xmm3,xmm3
-	pxor	xmm4,xmm4
-	pxor	xmm5,xmm5
-	xor	rcx,rcx
-	xor	rdx,rdx
-	xor	r8,r8
-	xor	r9,r9
-	xor	r10,r10
-	xor	r11,r11
-	lea	rax,QWORD PTR[rsp+8]
-	ret
-OPENSSL_wipe_cpu	ENDP
-_TEXT	ENDS
-
-CRT\$XIU	SEGMENT
-EXTRN	OPENSSL_cpuid_setup:PROC
-DQ	OPENSSL_cpuid_setup
-CRT\$XIU	ENDS
-
-___
-print<<___ if(!defined($masm));
-.text
-
-.globl	OPENSSL_atomic_add
-.type	OPENSSL_atomic_add,\@function
-.align	16
-OPENSSL_atomic_add:
-	movl	(%rdi),%eax
-.Lspin:	leaq	(%rsi,%rax),%r8
-lock;	cmpxchgl	%r8d,(%rdi)
-	jne	.Lspin
-	movl	%r8d,%eax
-	.byte	0x48,0x98
-	ret
-.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
-
-.globl	OPENSSL_wipe_cpu
-.type	OPENSSL_wipe_cpu,\@function
-.align	16
-OPENSSL_wipe_cpu:
-	pxor	%xmm0,%xmm0
-	pxor	%xmm1,%xmm1
-	pxor	%xmm2,%xmm2
-	pxor	%xmm3,%xmm3
-	pxor	%xmm4,%xmm4
-	pxor	%xmm5,%xmm5
-	pxor	%xmm6,%xmm6
-	pxor	%xmm7,%xmm7
-	pxor	%xmm8,%xmm8
-	pxor	%xmm9,%xmm9
-	pxor	%xmm10,%xmm10
-	pxor	%xmm11,%xmm11
-	pxor	%xmm12,%xmm12
-	pxor	%xmm13,%xmm13
-	pxor	%xmm14,%xmm14
-	pxor	%xmm15,%xmm15
-	xorq	%rcx,%rcx
-	xorq	%rdx,%rdx
-	xorq	%rsi,%rsi
-	xorq	%rdi,%rdi
-	xorq	%r8,%r8
-	xorq	%r9,%r9
-	xorq	%r10,%r10
-	xorq	%r11,%r11
-	leaq	8(%rsp),%rax
-	ret
-.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
-
+if ($win64)	{ $arg1="%rcx"; $arg2="%rdx"; }
+else		{ $arg1="%rdi"; $arg2="%rsi"; }
+print<<___;
+.extern		OPENSSL_cpuid_setup
 .section	.init
 	call	OPENSSL_cpuid_setup
 
-___
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $output";
-
-print<<___;
 .text
 
+.globl	OPENSSL_atomic_add
+.type	OPENSSL_atomic_add,\@abi-omnipotent
+.align	16
+OPENSSL_atomic_add:
+	movl	($arg1),%eax
+.Lspin:	leaq	($arg2,%rax),%r8
+	.byte	0xf0		# lock
+	cmpxchgl	%r8d,($arg1)
+	jne	.Lspin
+	movl	%r8d,%eax
+	.byte	0x48,0x98	# cltq/cdqe
+	ret
+.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
+
 .globl	OPENSSL_rdtsc
 .type	OPENSSL_rdtsc,\@abi-omnipotent
 .align	16
@@ -159,35 +86,91 @@
 .size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
 
 .globl  OPENSSL_cleanse
-.type   OPENSSL_cleanse,\@function,2
+.type   OPENSSL_cleanse,\@abi-omnipotent
 .align  16
 OPENSSL_cleanse:
 	xor	%rax,%rax
-	cmp	\$15,%rsi
+	cmp	\$15,$arg2
 	jae	.Lot
 .Little:
-	mov	%al,(%rdi)
-	sub	\$1,%rsi
-	lea	1(%rdi),%rdi
+	mov	%al,($arg1)
+	sub	\$1,$arg2
+	lea	1($arg1),$arg1
 	jnz	.Little
 	ret
 .align	16
 .Lot:
-	test	\$7,%rdi
+	test	\$7,$arg1
 	jz	.Laligned
-	mov	%al,(%rdi)
-	lea	-1(%rsi),%rsi
-	lea	1(%rdi),%rdi
+	mov	%al,($arg1)
+	lea	-1($arg2),$arg2
+	lea	1($arg1),$arg1
 	jmp	.Lot
 .Laligned:
-	mov	%rax,(%rdi)
-	lea	-8(%rsi),%rsi
-	test	\$-8,%rsi
-	lea	8(%rdi),%rdi
+	mov	%rax,($arg1)
+	lea	-8($arg2),$arg2
+	test	\$-8,$arg2
+	lea	8($arg1),$arg1
 	jnz	.Laligned
-	cmp	\$0,%rsi
+	cmp	\$0,$arg2
 	jne	.Little
 	ret
 .size	OPENSSL_cleanse,.-OPENSSL_cleanse
 ___
+
+print<<___ if (!$win64);
+.globl	OPENSSL_wipe_cpu
+.type	OPENSSL_wipe_cpu,\@abi-omnipotent
+.align	16
+OPENSSL_wipe_cpu:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	pxor	%xmm8,%xmm8
+	pxor	%xmm9,%xmm9
+	pxor	%xmm10,%xmm10
+	pxor	%xmm11,%xmm11
+	pxor	%xmm12,%xmm12
+	pxor	%xmm13,%xmm13
+	pxor	%xmm14,%xmm14
+	pxor	%xmm15,%xmm15
+	xorq	%rcx,%rcx
+	xorq	%rdx,%rdx
+	xorq	%rsi,%rsi
+	xorq	%rdi,%rdi
+	xorq	%r8,%r8
+	xorq	%r9,%r9
+	xorq	%r10,%r10
+	xorq	%r11,%r11
+	leaq	8(%rsp),%rax
+	ret
+.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
+___
+print<<___ if ($win64);
+.globl	OPENSSL_wipe_cpu
+.type	OPENSSL_wipe_cpu,\@abi-omnipotent
+.align	16
+OPENSSL_wipe_cpu:
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	xorq	%rcx,%rcx
+	xorq	%rdx,%rdx
+	xorq	%r8,%r8
+	xorq	%r9,%r9
+	xorq	%r10,%r10
+	xorq	%r11,%r11
+	leaq	8(%rsp),%rax
+	ret
+.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
+___
+
 close STDOUT;	# flush