[atomic] Add get_relaxed / set_relaxed

To help TSan and be more "correct".
diff --git a/src/hb-atomic-private.hh b/src/hb-atomic-private.hh
index e6a3a9a..bad409b 100644
--- a/src/hb-atomic-private.hh
+++ b/src/hb-atomic-private.hh
@@ -51,7 +51,9 @@
 /* C++11-style GCC primitives. */
 
 typedef int hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		__atomic_fetch_add (&(AI), (V), __ATOMIC_ACQ_REL)
+#define hb_atomic_int_impl_add(AI, V)		__atomic_fetch_add ((AI), (V), __ATOMIC_ACQ_REL)
+#define hb_atomic_int_impl_set_relaxed(AI, V)	__atomic_store_n ((AI), (V), __ATOMIC_RELAXED)
+#define hb_atomic_int_impl_get_relaxed(AI)	__atomic_load_n ((AI), __ATOMIC_RELAXED)
 
 #define hb_atomic_ptr_impl_get(P)		__atomic_load_n ((P), __ATOMIC_CONSUME)
 static inline bool
@@ -69,7 +71,9 @@
 #include <atomic>
 
 typedef int hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		(reinterpret_cast<std::atomic<int> *> (&AI)->fetch_add ((V), std::memory_order_acq_rel))
+#define hb_atomic_int_impl_add(AI, V)		(reinterpret_cast<std::atomic<int> *> (AI)->fetch_add ((V), std::memory_order_acq_rel))
+#define hb_atomic_int_impl_set_relaxed(AI, V)	(reinterpret_cast<std::atomic<int> *> (AI)->store ((V), std::memory_order_relaxed))
+#define hb_atomic_int_impl_get_relaxed(AI)	(reinterpret_cast<std::atomic<int> *> (AI)->load (std::memory_order_relaxed))
 
 #define hb_atomic_ptr_impl_get(P)		(reinterpret_cast<std::atomic<void*> *> (P)->load (std::memory_order_consume))
 static inline bool
@@ -98,7 +102,7 @@
 }
 
 typedef LONG hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		InterlockedExchangeAdd (&(AI), (V))
+#define hb_atomic_int_impl_add(AI, V)		InterlockedExchangeAdd ((AI), (V))
 
 #define hb_atomic_ptr_impl_get(P)		(_HBMemoryBarrier (), (void *) *(P))
 #define hb_atomic_ptr_impl_cmpexch(P,O,N)	(InterlockedCompareExchangePointer ((void **) (P), (void *) (N), (void *) (O)) == (void *) (O))
@@ -107,7 +111,7 @@
 #elif !defined(HB_NO_MT) && defined(HAVE_INTEL_ATOMIC_PRIMITIVES)
 
 typedef int hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		__sync_fetch_and_add (&(AI), (V))
+#define hb_atomic_int_impl_add(AI, V)		__sync_fetch_and_add ((AI), (V))
 
 #define hb_atomic_ptr_impl_get(P)		(void *) (__sync_synchronize (), *(P))
 #define hb_atomic_ptr_impl_cmpexch(P,O,N)	__sync_bool_compare_and_swap ((P), (O), (N))
@@ -119,7 +123,7 @@
 #include <mbarrier.h>
 
 typedef unsigned int hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		( ({__machine_rw_barrier ();}), atomic_add_int_nv (&(AI), (V)) - (V))
+#define hb_atomic_int_impl_add(AI, V)		( ({__machine_rw_barrier ();}), atomic_add_int_nv ((AI), (V)) - (V))
 
 #define hb_atomic_ptr_impl_get(P)		( ({__machine_rw_barrier ();}), (void *) *(P))
 #define hb_atomic_ptr_impl_cmpexch(P,O,N)	( ({__machine_rw_barrier ();}), atomic_cas_ptr ((void **) (P), (void *) (O), (void *) (N)) == (void *) (O) ? true : false)
@@ -136,7 +140,7 @@
 
 
 typedef int32_t hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		(OSAtomicAdd32Barrier ((V), &(AI)) - (V))
+#define hb_atomic_int_impl_add(AI, V)		(OSAtomicAdd32Barrier ((V), (AI)) - (V))
 
 #define hb_atomic_ptr_impl_get(P)		(OSMemoryBarrier (), (void *) *(P))
 #if (MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4 || __IPHONE_VERSION_MIN_REQUIRED >= 20100)
@@ -169,7 +173,7 @@
 }
 
 typedef int hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)           _hb_fetch_and_add (&(AI), (V))
+#define hb_atomic_int_impl_add(AI, V)           _hb_fetch_and_add ((AI), (V))
 
 #define hb_atomic_ptr_impl_get(P)               (__sync(), (void *) *(P))
 #define hb_atomic_ptr_impl_cmpexch(P,O,N)       _hb_compare_and_swaplp ((long*)(P), (long)(O), (long)(N))
@@ -179,7 +183,7 @@
 #define HB_ATOMIC_INT_NIL 1 /* Warn that fallback implementation is in use. */
 
 typedef volatile int hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		(((AI) += (V)) - (V))
+#define hb_atomic_int_impl_add(AI, V)		((*(AI) += (V)) - (V))
 
 #define hb_atomic_ptr_impl_get(P)		((void *) *(P))
 #define hb_atomic_ptr_impl_cmpexch(P,O,N)	(* (void * volatile *) (P) == (void *) (O) ? (* (void * volatile *) (P) = (void *) (N), true) : false)
@@ -188,7 +192,7 @@
 #else /* HB_NO_MT */
 
 typedef int hb_atomic_int_impl_t;
-#define hb_atomic_int_impl_add(AI, V)		(((AI) += (V)) - (V))
+#define hb_atomic_int_impl_add(AI, V)		((*(AI) += (V)) - (V))
 
 #define hb_atomic_ptr_impl_get(P)		((void *) *(P))
 #define hb_atomic_ptr_impl_cmpexch(P,O,N)	(* (void **) (P) == (void *) (O) ? (* (void **) (P) = (void *) (N), true) : false)
@@ -200,15 +204,22 @@
 #ifndef HB_ATOMIC_INT_INIT
 #define HB_ATOMIC_INT_INIT(V)          {V}
 #endif
+#ifndef hb_atomic_int_impl_set_relaxed
+#define hb_atomic_int_impl_set_relaxed(AI, V)	((AI) = (V))
+#endif
+#ifndef hb_atomic_int_impl_get_relaxed
+#define hb_atomic_int_impl_get_relaxed(AI)	(AI)
+#endif
+
 
 struct hb_atomic_int_t
 {
   mutable hb_atomic_int_impl_t v;
 
-  inline void set_unsafe (int v_) { v = v_; }
-  inline int get_unsafe (void) const { return v; }
-  inline int inc (void) { return hb_atomic_int_impl_add (v,  1); }
-  inline int dec (void) { return hb_atomic_int_impl_add (v, -1); }
+  inline void set_relaxed (int v_) { hb_atomic_int_impl_set_relaxed (&v, v_); }
+  inline int get_relaxed (void) const { return hb_atomic_int_impl_get_relaxed (&v); }
+  inline int inc (void) { return hb_atomic_int_impl_add (&v,  1); }
+  inline int dec (void) { return hb_atomic_int_impl_add (&v, -1); }
 };