Cpuid modules updates.
diff --git a/crypto/ia64cpuid.S b/crypto/ia64cpuid.S
index a800527..04fbb34 100644
--- a/crypto/ia64cpuid.S
+++ b/crypto/ia64cpuid.S
@@ -4,6 +4,118 @@
 .global	OPENSSL_rdtsc#
 .proc	OPENSSL_rdtsc#
 OPENSSL_rdtsc:
-	mov	r8=ar.itc
-	br.ret	b0
+{ .mib;	mov			r8=ar.itc
+	br.ret.sptk.many	b0		};;
 .endp   OPENSSL_rdtsc#
+
+.global	OPENSSL_atomic_add#
+.proc	OPENSSL_atomic_add#
+.align	32
+OPENSSL_atomic_add:
+{ .mii;	ld4		r2=[r32]
+	nop.i		0
+	nop.i		0		};;
+.Lspin:
+{ .mii;	mov		ar.ccv=r2
+	add		r8=r2,r33
+	mov		r3=r2		};;
+{ .mmi;	mf
+	cmpxchg4.acq	r2=[r32],r8,ar.ccv
+	nop.i		0		};;
+{ .mib;	cmp.ne		p6,p0=r2,r3
+	nop.i		0
+(p6)	br.dpnt		.Lspin		};;
+{ .mib;	nop.m		0
+	sxt4		r8=r8
+	br.ret.sptk.many	b0	};;
+.endp	OPENSSL_atomic_add#
+
+// Returns a structure comprising pointer to the top of stack of
+// the caller and pointer beyond backing storage for the current
+// register frame. The latter is required, because it might be
+// insufficient to wipe backing storage for the current frame
+// (as this procedure does), one might have to go further, toward
+// higher addresses to reach for whole "retroactively" saved
+// context...
+.global	OPENSSL_wipe_cpu#
+.proc	OPENSSL_wipe_cpu#
+.align	32
+OPENSSL_wipe_cpu:
+	.prologue
+	.fframe	0
+	.save	ar.pfs,r2
+	.save	ar.lc,r3
+{ .mib;	alloc		r2=ar.pfs,0,96,0,96
+	mov		r3=ar.lc
+	brp.loop.imp	.L_wipe_top,.L_wipe_end-16
+					};;
+{ .mii;	mov		r9=ar.bsp
+	mov		r8=pr
+	mov		ar.lc=96	};;
+	.body
+{ .mii;	add		r9=96*8-8,r9
+	mov		ar.ec=1		};;
+
+// One can sweep double as fast, but then we can't quarantee
+// that backing storage is wiped...
+.L_wipe_top:
+{ .mfi;	st8		[r9]=r0,-8
+	mov		f127=f0
+	mov		r127=r0		}
+{ .mfb;	nop.m		0
+	nop.f		0
+	br.ctop.sptk	.L_wipe_top	};;
+.L_wipe_end:
+
+{ .mfi;	mov		r11=r0
+	mov		f6=f0
+	mov		r14=r0		}
+{ .mfi;	mov		r15=r0
+	mov		f7=f0
+	mov		r16=r0		}
+{ .mfi;	mov		r17=r0
+	mov		f8=f0
+	mov		r18=r0		}
+{ .mfi;	mov		r19=r0
+	mov		f9=f0
+	mov		r20=r0		}
+{ .mfi;	mov		r21=r0
+	mov		f10=f0
+	mov		r22=r0		}
+{ .mfi;	mov		r23=r0
+	mov		f11=f0
+	mov		r24=r0		}
+{ .mfi;	mov		r25=r0
+	mov		f12=f0
+	mov		r26=r0		}
+{ .mfi;	mov		r27=r0
+	mov		f13=f0
+	mov		r28=r0		}
+{ .mfi;	mov		r29=r0
+	mov		f14=f0
+	mov		r30=r0		}
+{ .mfi;	mov		r31=r0
+	mov		f15=f0
+	nop.i		0		}
+{ .mfi;	mov		f16=f0		}
+{ .mfi;	mov		f17=f0		}
+{ .mfi;	mov		f18=f0		}
+{ .mfi;	mov		f19=f0		}
+{ .mfi;	mov		f20=f0		}
+{ .mfi;	mov		f21=f0		}
+{ .mfi;	mov		f22=f0		}
+{ .mfi;	mov		f23=f0		}
+{ .mfi;	mov		f24=f0		}
+{ .mfi;	mov		f25=f0		}
+{ .mfi;	mov		f26=f0		}
+{ .mfi;	mov		f27=f0		}
+{ .mfi;	mov		f28=f0		}
+{ .mfi;	mov		f29=f0		}
+{ .mfi;	mov		f30=f0		}
+{ .mfi;	add		r9=96*8+8,r9
+	mov		f31=f0
+	mov		pr=r8,0x1ffff	}
+{ .mib;	mov		r8=sp
+	mov		ar.lc=r3
+	br.ret.sptk	b0		};;
+.endp	OPENSSL_wipe_cpu#
diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S
new file mode 100644
index 0000000..c17350f
--- /dev/null
+++ b/crypto/sparccpuid.S
@@ -0,0 +1,239 @@
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+# define ABI64  /* They've said -xarch=v9 at command line */
+#elif defined(__GNUC__) && defined(__arch64__)
+# define ABI64  /* They've said -m64 at command line */
+#endif
+
+#ifdef ABI64
+  .register	%g2,#scratch
+  .register	%g3,#scratch
+# define	FRAME	-192
+# define	BIAS	2047
+#else
+# define	FRAME	-96
+# define	BIAS	0
+#endif
+
+.text
+.align	32
+.global	OPENSSL_wipe_cpu
+.type	OPENSSL_wipe_cpu,#function
+! Keep in mind that this does not excuse us from wiping the stack!
+! This routine wipes registers, but not the backing store [which
+! resides on the stack, toward lower addresses]. To facilitate for
+! stack wiping I return pointer to the top of stack of the *caller*.
+OPENSSL_wipe_cpu:
+	save	%sp,FRAME,%sp
+	nop
+#ifdef __sun
+#include <sys/trap.h>
+	ta	ST_CLEAN_WINDOWS
+#else
+	call	.walk.reg.wins
+#endif
+	nop
+	call	.PIC.zero.up
+	mov	.zero-(.-4),%o0
+	ldd	[%o0],%f0
+
+	subcc	%g0,1,%o0
+	! Following is V9 "rd %ccr,%o0" instruction. However! V8
+	! specification says that it ("rd %asr2,%o0" in V8 terms) does
+	! not cause illegal_instruction trap. It therefore can be used
+	! to determine if the CPU the code is executing on is V8- or
+	! V9-compliant, as V9 returns a distinct value of 0x99,
+	! "negative" and "borrow" bits set in both %icc and %xcc.
+	.word	0x91408000	!rd	%ccr,%o0
+	cmp	%o0,0x99
+	bne	.v8
+	nop
+			! Even though we do not use %fp register bank,
+			! we wipe it as memcpy might have used it...
+			.word	0xbfa00040	!fmovd	%f0,%f62
+			.word	0xbba00040	!...
+			.word	0xb7a00040
+			.word	0xb3a00040
+			.word	0xafa00040
+			.word	0xaba00040
+			.word	0xa7a00040
+			.word	0xa3a00040
+			.word	0x9fa00040
+			.word	0x9ba00040
+			.word	0x97a00040
+			.word	0x93a00040
+			.word	0x8fa00040
+			.word	0x8ba00040
+			.word	0x87a00040
+			.word	0x83a00040	!fmovd	%f0,%f32
+.v8:			fmovs	%f1,%f31
+	clr	%o0
+			fmovs	%f0,%f30
+	clr	%o1
+			fmovs	%f1,%f29
+	clr	%o2
+			fmovs	%f0,%f28
+	clr	%o3
+			fmovs	%f1,%f27
+	clr	%o4
+			fmovs	%f0,%f26
+	clr	%o5
+			fmovs	%f1,%f25
+	clr	%o7
+			fmovs	%f0,%f24
+	clr	%l0
+			fmovs	%f1,%f23
+	clr	%l1
+			fmovs	%f0,%f22
+	clr	%l2
+			fmovs	%f1,%f21
+	clr	%l3
+			fmovs	%f0,%f20
+	clr	%l4
+			fmovs	%f1,%f19
+	clr	%l5
+			fmovs	%f0,%f18
+	clr	%l6
+			fmovs	%f1,%f17
+	clr	%l7
+			fmovs	%f0,%f16
+	clr	%i0
+			fmovs	%f1,%f15
+	clr	%i1
+			fmovs	%f0,%f14
+	clr	%i2
+			fmovs	%f1,%f13
+	clr	%i3
+			fmovs	%f0,%f12
+	clr	%i4
+			fmovs	%f1,%f11
+	clr	%i5
+			fmovs	%f0,%f10
+	clr	%g1
+			fmovs	%f1,%f9
+	clr	%g2
+			fmovs	%f0,%f8
+	clr	%g3
+			fmovs	%f1,%f7
+	clr	%g4
+			fmovs	%f0,%f6
+	clr	%g5
+			fmovs	%f1,%f5
+			fmovs	%f0,%f4
+			fmovs	%f1,%f3
+			fmovs	%f0,%f2
+
+	add	%fp,BIAS,%i0	! return pointer to callerĀ“s top of stack
+
+	ret
+	restore
+
+.zero:	.long	0x0,0x0
+.PIC.zero.up:
+	retl
+	add	%o0,%o7,%o0
+#ifdef DEBUG
+.global	walk_reg_wins
+.type	walk_reg_wins,#function
+walk_reg_wins:
+#endif
+.walk.reg.wins:
+	save	%sp,FRAME,%sp
+	cmp	%i7,%o7
+	be	2f
+	clr	%o0
+	cmp	%o7,0	! compiler never cleans %o7...
+	be	1f	! could have been a leaf function...
+	clr	%o1
+	call	.walk.reg.wins
+	nop
+1:	clr	%o2
+	clr	%o3
+	clr	%o4
+	clr	%o5
+	clr	%o7
+	clr	%l0
+	clr	%l1
+	clr	%l2
+	clr	%l3
+	clr	%l4
+	clr	%l5
+	clr	%l6
+	clr	%l7
+	add	%o0,1,%i0	! used for debugging
+2:	ret
+	restore
+.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
+
+.global	OPENSSL_atomic_add
+.type	OPENSSL_atomic_add,#function
+OPENSSL_atomic_add:
+#ifndef ABI64
+	subcc	%g0,1,%o2
+	.word	0x95408000	!rd	%ccr,%o2, see comment above
+	cmp	%o2,0x99
+	be	.v9
+	nop
+	save	%sp,FRAME,%sp
+	ba	.enter
+	nop
+#ifdef __sun
+! Note that you don't have to link with libthread to call thr_yield,
+! as libc provides a stub, which is overloaded the moment you link
+! with *either* libpthread or libthread...
+#define	YIELD_CPU	thr_yield
+#else
+! applies at least to Linux and FreeBSD... Feedback expected...
+#define	YIELD_CPU	sched_yield
+#endif
+.spin:	call	YIELD_CPU
+	nop
+.enter:	ld	[%i0],%i2
+	cmp	%i2,-4096
+	be	.spin
+	mov	-1,%i2
+	swap	[%i0],%i2
+	cmp	%i2,-1
+	be	.spin
+	add	%i2,%i1,%i2
+	stbar
+	st	%i2,[%i0]
+	sra	%i2,%g0,%i0
+	ret
+	restore
+.v9:
+#endif
+	ld	[%o0],%o2
+1:	add	%o1,%o2,%o3
+	.word	0xd7e2100a	!cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
+	cmp	%o2,%o3
+	bne	1b
+	mov	%o3,%o2		! cas is always fetching to dest. register
+	add	%o1,%o2,%o0	! OpenSSL expects the new value
+	retl
+	sra	%o0,%g0,%o0	! we return signed int, remember?
+.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
+
+.global	OPENSSL_rdtsc
+	subcc	%g0,1,%o0
+	.word	0x91408000	!rd	%ccr,%o0
+	cmp	%o0,0x99
+	bne	.notsc
+	xor	%o0,%o0,%o0
+	save	%sp,FRAME-16,%sp
+	mov	513,%o0		!SI_PLATFORM
+	add	%sp,BIAS+16,%o1
+	call	sysinfo
+	mov	256,%o2
+
+	add	%sp,BIAS-16,%o1
+	ld	[%o1],%l0
+	ld	[%o1+4],%l1
+	ld	[%o1+8],%l2
+	mov	%lo('SUNW'),%l3
+	ret
+	restore
+.notsc:
+	retl
+	nop
+.type	OPENSSL_rdtsc,#function
+.size	OPENSSL_rdtsc,.-OPENSSL_atomic_add
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
index 894c49c..9ad9435 100644
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -72,6 +72,84 @@
 	&ret	();
 &function_end_B("OPENSSL_instrument_halt");
 
+# Essentially there is only one use for this function. Under DJGPP:
+#
+#	#include <go32.h>
+#	...
+#	i=OPENSSL_far_spin(_dos_ds,0x46c);
+#	...
+# to obtain the number of spins till closest timer interrupt.
+
+&function_begin_B("OPENSSL_far_spin");
+	&pushf	();
+	&pop	("eax")
+	&bt	("eax",9);
+	&jnc	(&label("nospin"));	# interrupts are disabled
+
+	&mov	("eax",&DWP(4,"esp"));
+	&mov	("ecx",&DWP(8,"esp"));
+	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
+	&xor	("eax","eax");
+	&mov	("edx",&DWP(0,"ecx"));
+	&jmp	(&label("spin"));
+
+	&align	(16);
+&set_label("spin");
+	&inc	("eax");
+	&cmp	("edx",&DWP(0,"ecx"));
+	&je	(&label("spin"));
+
+	&data_word (0x1f909090);	# pop	%ds
+	&ret	();
+
+&set_label("nospin");
+	&xor	("eax","eax");
+	&xor	("edx","edx");
+	&ret	();
+&function_end_B("OPENSSL_far_spin");
+
+&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
+	&xor	("eax","eax");
+	&xor	("edx","edx");
+	&picmeup("ecx","OPENSSL_ia32cap_P");
+	&mov	("ecx",&DWP(0,"ecx"));
+	&bt	(&DWP(0,"ecx"),1);
+	&jnc	(&label("no_x87"));
+	&bt	(&DWP(0,"ecx"),26);
+	&jnc	(&label("no_sse2"));
+	&pxor	("xmm0","xmm0");
+	&pxor	("xmm1","xmm1");
+	&pxor	("xmm2","xmm2");
+	&pxor	("xmm3","xmm3");
+	&pxor	("xmm4","xmm4");
+	&pxor	("xmm5","xmm5");
+	&pxor	("xmm6","xmm6");
+	&pxor	("xmm7","xmm7");
+&set_label("no_sse2");
+	# just a bunch of fldz to zap the fp/mm bank...
+	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9);
+	&emms	();
+&set_label("no_x87");
+	&lea	("eax",&DWP(4,"esp"));
+	&ret	();
+&function_end_B("OPENSSL_wipe_cpu");
+
+&function_begin_B("OPENSSL_atomic_add");
+	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
+	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
+	&push	("ebx");
+	&nop	();
+	&mov	("eax",&DWP(0,"edx"));
+&set_label("spin");
+	&lea	("ebx",&DWP(0,"eax","ecx"));
+	&nop	();
+	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
+	&jne	(&label("spin"));
+	&mov	("eax","ebx");	# OpenSSL expects the new value
+	&pop	("ebx");
+	&ret	();
+&function_end_B("OPENSSL_atomic_add");
+
 &initseg("OPENSSL_cpuid_setup");
 
 &asm_finish();