[atomic] Fix Solaris ones to add proper barriers
diff --git a/src/hb-atomic-private.hh b/src/hb-atomic-private.hh
index 0e7a841..0b043e6 100644
--- a/src/hb-atomic-private.hh
+++ b/src/hb-atomic-private.hh
@@ -62,7 +62,7 @@
   const void *O = O_; // Need lvalue
   return __atomic_compare_exchange_n ((void **) P, (void **) &O, (void *) N, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
 }
-#define hb_atomic_ptr_impl_cmpexch(P,O,N)	(_hb_atomic_ptr_impl_cmplexch ((const void **) (P), (O), (N)))
+#define hb_atomic_ptr_impl_cmpexch(P,O,N)	_hb_atomic_ptr_impl_cmplexch ((const void **) (P), (O), (N))
 
 #elif !defined(HB_NO_MT) && __cplusplus >= 201103L
 
@@ -82,7 +82,7 @@
   const void *O = O_; // Need lvalue
   return reinterpret_cast<std::atomic<const void*> *> (P)->compare_exchange_weak (O, N, std::memory_order_acq_rel, std::memory_order_relaxed);
 }
-#define hb_atomic_ptr_impl_cmpexch(P,O,N)	(_hb_atomic_ptr_impl_cmplexch ((const void **) (P), (O), (N)))
+#define hb_atomic_ptr_impl_cmpexch(P,O,N)	_hb_atomic_ptr_impl_cmplexch ((const void **) (P), (O), (N))
 
 
 #elif !defined(HB_NO_MT) && (defined(_WIN32) || defined(__CYGWIN__))
@@ -127,9 +127,25 @@
 #define _hb_memory_barrier()			__machine_rw_barrier ()
 
 typedef unsigned int hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		( ({__machine_rw_barrier ();}), atomic_add_int_nv ((AI), (V)) - (V) /* XXX barrier again? */)
 
-#define hb_atomic_ptr_impl_cmpexch(P,O,N)	( ({__machine_rw_barrier ();}), atomic_cas_ptr ((void **) (P), (void *) (O), (void *) (N)) == (void *) (O) ? true : false /* XXX barrier again? */)
+static inline int _hb_fetch_and_add (hb_atomic_int_impl_t *AI, int V)
+{
+  _hb_memory_w_barrier ();
+  int result = atomic_add_int_nv (AI, V);
+  _hb_memory_r_barrier ();
+  return result;
+}
+static inline bool _hb_compare_and_swap_ptr (const void **P, const void *O, const void *N)
+{
+  _hb_memory_w_barrier ();
+  int result = atomic_cas_ptr ((void **) P, (void *) O, (void *) N) == (void *) O;
+  _hb_memory_r_barrier ();
+  return result;
+}
+
+#define hb_atomic_int_impl_add(AI, V)           _hb_fetch_and_add ((AI), (V))
+
+#define hb_atomic_ptr_impl_cmpexch(P,O,N)       _hb_compare_and_swap_ptr ((const void **) (P), (O), (N))
 
 
 #elif !defined(HB_NO_MT) && defined(__APPLE__)
@@ -161,25 +177,29 @@
 
 #include <builtins.h>
 
-static inline int _hb_fetch_and_add (int* AI, unsigned int V) {
-  __lwsync();
-  int result = __fetch_and_add(AI, V);
-  __lwsync();
-  return result;
-}
-static inline int _hb_compare_and_swaplp (long* P, long O, long N) {
-  __lwsync();
-  int result = __compare_and_swaplp (P, &O, N);
-  __lwsync();
-  return result;
-}
-
 #define _hb_memory_barrier()			__lwsync ()
 
 typedef int hb_atomic_int_impl_t;
+
+static inline int _hb_fetch_and_add (hb_atomic_int_impl_t *AI, int V)
+{
+  _hb_memory_barrier ();
+  int result = __fetch_and_add (AI, V);
+  _hb_memory_barrier ();
+  return result;
+}
+static inline bool _hb_compare_and_swaplp (long *P, long O, long N)
+{
+  _hb_memory_barrier ();
+  bool result = __compare_and_swaplp (P, &O, N);
+  _hb_memory_barrier ();
+  return result;
+}
+
 #define hb_atomic_int_impl_add(AI, V)           _hb_fetch_and_add ((AI), (V))
 
-#define hb_atomic_ptr_impl_cmpexch(P,O,N)       _hb_compare_and_swaplp ((long*)(P), (long)(O), (long)(N))
+#define hb_atomic_ptr_impl_cmpexch(P,O,N)       _hb_compare_and_swaplp ((long *) (P), (long) (O), (long) (N))
+static_assert ((sizeof (long) == sizeof (void *)), "");
 
 
 #elif !defined(HB_NO_MT)