[atomic/cache] Add hb_atomic_short_t
diff --git a/src/hb-atomic.hh b/src/hb-atomic.hh
index 14c6fb3..a474930 100644
--- a/src/hb-atomic.hh
+++ b/src/hb-atomic.hh
@@ -84,11 +84,11 @@
 #define _hb_memory_r_barrier()			std::atomic_thread_fence(std::memory_order_acquire)
 #define _hb_memory_w_barrier()			std::atomic_thread_fence(std::memory_order_release)
 
-#define hb_atomic_int_impl_add(AI, V)		(reinterpret_cast<std::atomic<int> *> (AI)->fetch_add ((V), std::memory_order_acq_rel))
-#define hb_atomic_int_impl_set_relaxed(AI, V)	(reinterpret_cast<std::atomic<int> *> (AI)->store ((V), std::memory_order_relaxed))
-#define hb_atomic_int_impl_set(AI, V)		(reinterpret_cast<std::atomic<int> *> (AI)->store ((V), std::memory_order_release))
-#define hb_atomic_int_impl_get_relaxed(AI)	(reinterpret_cast<std::atomic<int> const *> (AI)->load (std::memory_order_relaxed))
-#define hb_atomic_int_impl_get(AI)		(reinterpret_cast<std::atomic<int> const *> (AI)->load (std::memory_order_acquire))
+#define hb_atomic_int_impl_add(AI, V)		(reinterpret_cast<std::atomic<std::decay<decltype (*(AI))>::type> *> (AI)->fetch_add ((V), std::memory_order_acq_rel))
+#define hb_atomic_int_impl_set_relaxed(AI, V)	(reinterpret_cast<std::atomic<std::decay<decltype (*(AI))>::type> *> (AI)->store ((V), std::memory_order_relaxed))
+#define hb_atomic_int_impl_set(AI, V)		(reinterpret_cast<std::atomic<std::decay<decltype (*(AI))>::type> *> (AI)->store ((V), std::memory_order_release))
+#define hb_atomic_int_impl_get_relaxed(AI)	(reinterpret_cast<std::atomic<std::decay<decltype (*(AI))>::type> const *> (AI)->load (std::memory_order_relaxed))
+#define hb_atomic_int_impl_get(AI)		(reinterpret_cast<std::atomic<std::decay<decltype (*(AI))>::type> const *> (AI)->load (std::memory_order_acquire))
 
 #define hb_atomic_ptr_impl_set_relaxed(P, V)	(reinterpret_cast<std::atomic<void*> *> (P)->store ((V), std::memory_order_relaxed))
 #define hb_atomic_ptr_impl_get_relaxed(P)	(reinterpret_cast<std::atomic<void*> const *> (P)->load (std::memory_order_relaxed))
@@ -145,15 +145,35 @@
 #endif
 #ifndef hb_atomic_int_impl_set
 inline void hb_atomic_int_impl_set (int *AI, int v)	{ _hb_memory_w_barrier (); *AI = v; }
+inline void hb_atomic_int_impl_set (short *AI, short v)	{ _hb_memory_w_barrier (); *AI = v; }
 #endif
 #ifndef hb_atomic_int_impl_get
 inline int hb_atomic_int_impl_get (const int *AI)	{ int v = *AI; _hb_memory_r_barrier (); return v; }
+inline short hb_atomic_int_impl_get (const short *AI)	{ short v = *AI; _hb_memory_r_barrier (); return v; }
 #endif
 #ifndef hb_atomic_ptr_impl_get
 inline void *hb_atomic_ptr_impl_get (void ** const P)	{ void *v = *P; _hb_memory_r_barrier (); return v; }
 #endif
 
 
+struct hb_atomic_short_t
+{
+  hb_atomic_short_t () = default;
+  constexpr hb_atomic_short_t (short v) : v (v) {}
+
+  hb_atomic_short_t& operator = (short v_) { set_relaxed (v_); return *this; }
+  operator short () const { return get_relaxed (); }
+
+  void set_relaxed (short v_) { hb_atomic_int_impl_set_relaxed (&v, v_); }
+  void set_release (short v_) { hb_atomic_int_impl_set (&v, v_); }
+  short get_relaxed () const { return hb_atomic_int_impl_get_relaxed (&v); }
+  short get_acquire () const { return hb_atomic_int_impl_get (&v); }
+  short inc () { return hb_atomic_int_impl_add (&v,  1); }
+  short dec () { return hb_atomic_int_impl_add (&v, -1); }
+
+  short v = 0;
+};
+
 struct hb_atomic_int_t
 {
   hb_atomic_int_t () = default;
diff --git a/src/hb-cache.hh b/src/hb-cache.hh
index 1952286..2e98187 100644
--- a/src/hb-cache.hh
+++ b/src/hb-cache.hh
@@ -39,7 +39,9 @@
 struct hb_cache_t
 {
   using item_t = typename std::conditional<thread_safe,
-					   hb_atomic_int_t,
+					   typename std::conditional<key_bits + value_bits - cache_bits <= 16,
+								     hb_atomic_short_t,
+								     hb_atomic_int_t>::type,
 					   typename std::conditional<key_bits + value_bits - cache_bits <= 16,
 								     short,
 								     int>::type