Update aosp/master compiler-rt for rebase to r230699.

Change-Id: I6c415fd5f6420e3012d9da76719111721e906dfa
diff --git a/test/tsan/CMakeLists.txt b/test/tsan/CMakeLists.txt
index 29c0821..5a9542f 100644
--- a/test/tsan/CMakeLists.txt
+++ b/test/tsan/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(TSAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+list(APPEND TSAN_TEST_DEPS GotsanRuntimeCheck)
 if(NOT COMPILER_RT_STANDALONE_BUILD)
   list(APPEND TSAN_TEST_DEPS tsan)
 endif()
diff --git a/test/tsan/aligned_vs_unaligned_race.cc b/test/tsan/aligned_vs_unaligned_race.cc
index f82542e..5c1189f 100644
--- a/test/tsan/aligned_vs_unaligned_race.cc
+++ b/test/tsan/aligned_vs_unaligned_race.cc
@@ -1,34 +1,35 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 // Race between an aligned access and an unaligned access, which
 // touches the same memory region.
-// This is a real race which is not detected by tsan.
-// https://code.google.com/p/thread-sanitizer/issues/detail?id=17
-#include <pthread.h>
-#include <stdio.h>
+#include "test.h"
 #include <stdint.h>
 
 uint64_t Global[2];
 
 void *Thread1(void *x) {
   Global[1]++;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
+  barrier_wait(&barrier);
   char *p1 = reinterpret_cast<char *>(&Global[0]);
-  uint64_t *p4 = reinterpret_cast<uint64_t *>(p1 + 1);
-  (*p4)++;
+  struct __attribute__((packed, aligned(1))) u_uint64_t { uint64_t val; };
+  u_uint64_t *p4 = reinterpret_cast<u_uint64_t *>(p1 + 1);
+  (*p4).val++;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
   printf("Pass\n");
-  // CHECK-NOT: ThreadSanitizer: data race
+  // CHECK: ThreadSanitizer: data race
   // CHECK: Pass
   return 0;
 }
diff --git a/test/tsan/annotate_happens_before.cc b/test/tsan/annotate_happens_before.cc
new file mode 100644
index 0000000..0116616
--- /dev/null
+++ b/test/tsan/annotate_happens_before.cc
@@ -0,0 +1,57 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+/*
+Annotations usage example.
+
+Tsan does not see synchronization in barrier_wait.
+ANNOTATE_HAPPENS_BEFORE/AFTER communicate the synchronization to tsan
+and prevent the race report.
+
+If the compiler does not support __has_feature macro, then you can build with
+CFLAGS="-fsanitize=thread -DTHREAD_SANITIZER" and then use
+#ifdef THREAD_SANITIZER to enabled annotations.
+*/
+
+#if defined(__has_feature) && __has_feature(thread_sanitizer)
+# define ANNOTATE_HAPPENS_BEFORE(addr) \
+    AnnotateHappensBefore(__FILE__, __LINE__, (void*)(addr))
+# define ANNOTATE_HAPPENS_AFTER(addr) \
+    AnnotateHappensAfter(__FILE__, __LINE__, (void*)(addr))
+extern "C" void AnnotateHappensBefore(const char *f, int l, void *addr);
+extern "C" void AnnotateHappensAfter(const char *f, int l, void *addr);
+#else
+# define ANNOTATE_HAPPENS_BEFORE(addr)
+# define ANNOTATE_HAPPENS_AFTER(addr)
+#endif
+
+int Global;
+
+void *Thread1(void *x) {
+  barrier_wait(&barrier);
+  ANNOTATE_HAPPENS_AFTER(&barrier);
+  Global++;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  Global--;
+  ANNOTATE_HAPPENS_BEFORE(&barrier);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
+
diff --git a/test/tsan/atomic_free.cc b/test/tsan/atomic_free.cc
index 1dcf887..a0d8e42 100644
--- a/test/tsan/atomic_free.cc
+++ b/test/tsan/atomic_free.cc
@@ -1,17 +1,18 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread(void *a) {
   __atomic_fetch_add((int*)a, 1, __ATOMIC_SEQ_CST);
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int *a = new int(0);
   pthread_t t;
   pthread_create(&t, 0, Thread, a);
-  sleep(1);
+  barrier_wait(&barrier);
   delete a;
   pthread_join(t, 0);
 }
diff --git a/test/tsan/atomic_free2.cc b/test/tsan/atomic_free2.cc
index c50be6b..4a9f268 100644
--- a/test/tsan/atomic_free2.cc
+++ b/test/tsan/atomic_free2.cc
@@ -1,18 +1,19 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   __atomic_fetch_add((int*)a, 1, __ATOMIC_SEQ_CST);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int *a = new int(0);
   pthread_t t;
   pthread_create(&t, 0, Thread, a);
   delete a;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/atomic_norace.cc b/test/tsan/atomic_norace.cc
index d9ccda5..625109b 100644
--- a/test/tsan/atomic_norace.cc
+++ b/test/tsan/atomic_norace.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 const int kTestCount = 4;
 typedef long long T;
@@ -36,7 +34,8 @@
   for (int i = 0; i < kTestCount; i++) {
     Test(i, &atomics[i], false);
   }
-  sleep(2);
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
   for (int i = 0; i < kTestCount; i++) {
     fprintf(stderr, "Test %d reverse\n", i);
     Test(i, &atomics[kTestCount + i], false);
@@ -45,9 +44,10 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
-  sleep(1);
+  barrier_wait(&barrier);
   for (int i = 0; i < kTestCount; i++) {
     fprintf(stderr, "Test %d\n", i);
     Test(i, &atomics[i], true);
@@ -55,6 +55,7 @@
   for (int i = 0; i < kTestCount; i++) {
     Test(i, &atomics[kTestCount + i], true);
   }
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/atomic_race.cc b/test/tsan/atomic_race.cc
index ca444b4..5a0317c 100644
--- a/test/tsan/atomic_race.cc
+++ b/test/tsan/atomic_race.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
-#include <stdio.h>
+#include "test.h"
 
 const int kTestCount = 4;
 typedef long long T;
@@ -36,7 +34,8 @@
   for (int i = 0; i < kTestCount; i++) {
     Test(i, &atomics[i], false);
   }
-  sleep(2);
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
   for (int i = 0; i < kTestCount; i++) {
     fprintf(stderr, "Test %d reverse\n", i);
     Test(i, &atomics[kTestCount + i], false);
@@ -45,9 +44,10 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
-  sleep(1);
+  barrier_wait(&barrier);
   for (int i = 0; i < kTestCount; i++) {
     fprintf(stderr, "Test %d\n", i);
     Test(i, &atomics[i], true);
@@ -55,6 +55,7 @@
   for (int i = 0; i < kTestCount; i++) {
     Test(i, &atomics[kTestCount + i], true);
   }
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/atomic_stack.cc b/test/tsan/atomic_stack.cc
index 7e3176f..979eaea 100644
--- a/test/tsan/atomic_stack.cc
+++ b/test/tsan/atomic_stack.cc
@@ -1,21 +1,22 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   __atomic_fetch_add(&Global, 1, __ATOMIC_RELAXED);
   return NULL;
 }
 
 void *Thread2(void *x) {
   Global++;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/benign_race.cc b/test/tsan/benign_race.cc
index b6cba19..2f72fe1 100644
--- a/test/tsan/benign_race.cc
+++ b/test/tsan/benign_race.cc
@@ -1,7 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 int WTFGlobal;
@@ -18,10 +16,12 @@
 void *Thread(void *x) {
   Global = 42;
   WTFGlobal = 142;
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   AnnotateBenignRaceSized(__FILE__, __LINE__,
                           &Global, sizeof(Global), "Race on Global");
   WTFAnnotateBenignRaceSized(__FILE__, __LINE__,
@@ -29,7 +29,7 @@
                              "Race on WTFGlobal");
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
-  sleep(1);
+  barrier_wait(&barrier);
   Global = 43;
   WTFGlobal = 143;
   pthread_join(t, 0);
diff --git a/test/tsan/blacklist2.cc b/test/tsan/blacklist2.cc
index 1092561..629b588 100644
--- a/test/tsan/blacklist2.cc
+++ b/test/tsan/blacklist2.cc
@@ -5,14 +5,12 @@
 
 // RUN: %clangxx_tsan -O1 %s -fsanitize-blacklist=%t.blacklist -o %t
 // RUN: %deflake %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   // CHECK: ThreadSanitizer: data race
   // CHECK: Write of size 4
   // CHECK: #0 Thread1{{.*}}blacklist2.cc:[[@LINE+1]]
@@ -35,10 +33,12 @@
   Global--;
   // CHECK: #2 Blacklisted_Thread2{{.*}}blacklist2.cc:[[@LINE+1]]
   CallTouchGlobal();
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Blacklisted_Thread2, NULL);
diff --git a/test/tsan/cond_cancel.c b/test/tsan/cond_cancel.c
index 397cad4..e744570 100644
--- a/test/tsan/cond_cancel.c
+++ b/test/tsan/cond_cancel.c
@@ -2,10 +2,7 @@
 // CHECK-NOT: WARNING
 // CHECK: OK
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 pthread_mutex_t m;
 pthread_cond_t c;
@@ -14,6 +11,7 @@
 void *thr1(void *p) {
   pthread_mutex_lock(&m);
   pthread_cleanup_push((void(*)(void *arg))pthread_mutex_unlock, &m);
+  barrier_wait(&barrier);
   while (x == 0)
     pthread_cond_wait(&c, &m);
   pthread_cleanup_pop(1);
@@ -21,12 +19,15 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
+
   pthread_t th;
 
   pthread_mutex_init(&m, 0);
   pthread_cond_init(&c, 0);
 
   pthread_create(&th, 0, thr1, 0);
+  barrier_wait(&barrier);
   sleep(1);  // let it block on cond var
   pthread_cancel(th);
 
diff --git a/test/tsan/cond_race.cc b/test/tsan/cond_race.cc
index fa42faf..52654f1 100644
--- a/test/tsan/cond_race.cc
+++ b/test/tsan/cond_race.cc
@@ -3,10 +3,7 @@
 // CHECK: ThreadSanitizer: data race
 // CHECK: pthread_cond_signal
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 struct Ctx {
   pthread_mutex_t m;
@@ -20,10 +17,12 @@
   c->done = true;
   pthread_mutex_unlock(&c->m);
   pthread_cond_signal(&c->c);
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   Ctx *c = new Ctx();
   pthread_mutex_init(&c->m, 0);
   pthread_cond_init(&c->c, 0);
@@ -33,8 +32,8 @@
   while (!c->done)
     pthread_cond_wait(&c->c, &c->m);
   pthread_mutex_unlock(&c->m);
-  // w/o this sleep, it can be reported as use-after-free
-  sleep(1);
+  // otherwise it can be reported as use-after-free
+  barrier_wait(&barrier);
   delete c;
   pthread_join(th, 0);
 }
diff --git a/test/tsan/deadlock_detector_stress_test.cc b/test/tsan/deadlock_detector_stress_test.cc
index 5362478..e02a912 100644
--- a/test/tsan/deadlock_detector_stress_test.cc
+++ b/test/tsan/deadlock_detector_stress_test.cc
@@ -7,12 +7,9 @@
 // RUN: TSAN_OPTIONS=detect_deadlocks=1 %deflake %run %t | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-RD
 // RUN: %clangxx_tsan %s -o %t -DLockType=PthreadRecursiveMutex
 // RUN: TSAN_OPTIONS=detect_deadlocks=1 %deflake %run %t | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-REC
-#include <pthread.h>
+#include "test.h"
 #undef NDEBUG
 #include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
 #include <new>
 
 #ifndef LockType
@@ -224,7 +221,7 @@
     fprintf(stderr, "Starting Test5\n");
     // CHECK: Starting Test5
     Init(5);
-    RunThreads(&LockTest::Lock_0_1, &LockTest::Lock_1_0);
+    RunThreads(&LockTest::Lock_0_1<true>, &LockTest::Lock_1_0<true>);
     // CHECK: WARNING: ThreadSanitizer: lock-order-inversion
     // CHECK: Cycle in lock order graph: [[M1:M[0-9]+]] ({{.*}}) => [[M2:M[0-9]+]] ({{.*}}) => [[M1]]
     // CHECK: Mutex [[M2]] acquired here while holding mutex [[M1]] in thread [[T1:T[0-9]+]]
@@ -503,8 +500,21 @@
 
  private:
   void Lock2(size_t l1, size_t l2) { L(l1); L(l2); U(l2); U(l1); }
-  void Lock_0_1() { Lock2(0, 1); }
-  void Lock_1_0() { sleep(1); Lock2(1, 0); }
+
+  template<bool wait = false>
+  void Lock_0_1() {
+    Lock2(0, 1);
+    if (wait)
+      barrier_wait(&barrier);
+  }
+
+  template<bool wait = false>
+  void Lock_1_0() {
+    if (wait)
+      barrier_wait(&barrier);
+    Lock2(1, 0);
+  }
+
   void Lock1_Loop(size_t i, size_t n_iter) {
     for (size_t it = 0; it < n_iter; it++) {
       // if ((it & (it - 1)) == 0) fprintf(stderr, "%zd", i);
@@ -569,6 +579,7 @@
 };
 
 int main(int argc, char **argv) {
+  barrier_init(&barrier, 2);
   if (argc > 1)
     test_number = atoi(argv[1]);
   if (argc > 2)
diff --git a/test/tsan/deep_stack1.cc b/test/tsan/deep_stack1.cc
index 1d00a0e..39185ef 100644
--- a/test/tsan/deep_stack1.cc
+++ b/test/tsan/deep_stack1.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t -DORDER1 && %deflake %run %t | FileCheck %s
 // RUN: %clangxx_tsan -O1 %s -o %t -DORDER2 && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 volatile int X;
 volatile int N;
@@ -17,13 +15,17 @@
 
 void *Thread(void *p) {
 #ifdef ORDER1
-  sleep(1);
+  barrier_wait(&barrier);
 #endif
   F();
+#ifdef ORDER2
+  barrier_wait(&barrier);
+#endif
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   N = 50000;
   F = foo;
   pthread_t t;
@@ -32,9 +34,13 @@
   pthread_attr_setstacksize(&a, N * 256 + (1 << 20));
   pthread_create(&t, &a, Thread, 0);
 #ifdef ORDER2
-  sleep(1);
+  barrier_wait(&barrier);
 #endif
   X = 43;
+#ifdef ORDER1
+  barrier_wait(&barrier);
+#endif
+
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/fd_close_norace.cc b/test/tsan/fd_close_norace.cc
index 7238d64..1b52c20 100644
--- a/test/tsan/fd_close_norace.cc
+++ b/test/tsan/fd_close_norace.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -9,17 +7,19 @@
 void *Thread1(void *x) {
   int f = open("/dev/random", O_RDONLY);
   close(f);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   int f = open("/dev/random", O_RDONLY);
   close(f);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/fd_location.cc b/test/tsan/fd_location.cc
index 535329e..1861c89 100644
--- a/test/tsan/fd_location.cc
+++ b/test/tsan/fd_location.cc
@@ -1,23 +1,23 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int fds[2];
 
 void *Thread1(void *x) {
   write(fds[1], "a", 1);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   close(fds[0]);
   close(fds[1]);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pipe(fds);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
diff --git a/test/tsan/fd_pipe_race.cc b/test/tsan/fd_pipe_race.cc
index 88c4ed4..b94893b 100644
--- a/test/tsan/fd_pipe_race.cc
+++ b/test/tsan/fd_pipe_race.cc
@@ -1,23 +1,23 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int fds[2];
 
 void *Thread1(void *x) {
   write(fds[1], "a", 1);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   close(fds[0]);
   close(fds[1]);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pipe(fds);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
diff --git a/test/tsan/fd_stdout_race.cc b/test/tsan/fd_stdout_race.cc
index d6a2c7c..fcf8c21 100644
--- a/test/tsan/fd_stdout_race.cc
+++ b/test/tsan/fd_stdout_race.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -9,7 +7,7 @@
 int X;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   int f = open("/dev/random", O_RDONLY);
   char buf;
   read(f, &buf, 1);
@@ -21,10 +19,12 @@
 void *Thread2(void *x) {
   X = 43;
   write(STDOUT_FILENO, "a", 1);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/fork_deadlock.cc b/test/tsan/fork_deadlock.cc
index cc5b122..9418800 100644
--- a/test/tsan/fork_deadlock.cc
+++ b/test/tsan/fork_deadlock.cc
@@ -1,9 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && TSAN_OPTIONS="atexit_sleep_ms=50" %run %t 2>&1 | FileCheck %s
-#include <stdlib.h>
-#include <stdio.h>
+#include "test.h"
 #include <errno.h>
-#include <pthread.h>
-#include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
@@ -16,13 +13,14 @@
 }
 
 static void *watchdog(void *p) {
-  sleep(100);
+  sleep(100);  // is not intended to exit
   fprintf(stderr, "timed out after 100 seconds\n");
   exit(1);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t th1, th2;
   pthread_create(&th1, 0, incrementer, 0);
   pthread_create(&th2, 0, watchdog, 0);
diff --git a/test/tsan/fork_multithreaded.cc b/test/tsan/fork_multithreaded.cc
index 5176a14..3ddb417 100644
--- a/test/tsan/fork_multithreaded.cc
+++ b/test/tsan/fork_multithreaded.cc
@@ -1,19 +1,21 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-DIE
 // RUN: %clangxx_tsan -O1 %s -o %t && TSAN_OPTIONS="die_after_fork=0" %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-NODIE
-#include <stdlib.h>
-#include <stdio.h>
+#include "test.h"
 #include <errno.h>
-#include <pthread.h>
-#include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
 static void *sleeper(void *p) {
-  sleep(10);
+  sleep(10);  // not intended to exit during test
+  return 0;
+}
+
+static void *nop(void *p) {
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t th;
   pthread_create(&th, 0, sleeper, 0);
   switch (fork()) {
@@ -23,7 +25,7 @@
   case 0:  // child
     {
       pthread_t th2;
-      pthread_create(&th2, 0, sleeper, 0);
+      pthread_create(&th2, 0, nop, 0);
       exit(0);
       break;
     }
diff --git a/test/tsan/free_race.c b/test/tsan/free_race.c
index 663d7bc..63cee8c 100644
--- a/test/tsan/free_race.c
+++ b/test/tsan/free_race.c
@@ -1,12 +1,8 @@
 // RUN: %clang_tsan -O1 %s -o %t
 // RUN: %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOZUPP
-// RUN: TSAN_OPTIONS="suppressions=%s.supp print_suppressions=1" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-SUPP
+// RUN: TSAN_OPTIONS="suppressions='%s.supp' print_suppressions=1" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-SUPP
 
-#include <pthread.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 int *mem;
 pthread_mutex_t mtx;
@@ -15,11 +11,12 @@
   pthread_mutex_lock(&mtx);
   free(mem);
   pthread_mutex_unlock(&mtx);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_mutex_lock(&mtx);
   mem[0] = 42;
   pthread_mutex_unlock(&mtx);
@@ -27,6 +24,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   mem = (int*)malloc(100);
   pthread_mutex_init(&mtx, 0);
   pthread_t t;
diff --git a/test/tsan/global_race.cc b/test/tsan/global_race.cc
index e12bb1d..3128ec4 100644
--- a/test/tsan/global_race.cc
+++ b/test/tsan/global_race.cc
@@ -1,24 +1,23 @@
 // RUN: %clangxx_tsan -O1 %s -o %T/global_race.cc.exe && %deflake %run %T/global_race.cc.exe | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 int GlobalData[10];
 
 void *Thread(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   GlobalData[2] = 42;
   return 0;
 }
 
 int main() {
-  // On FreeBSD, the %p conversion specifier works as 0x%x and thus does not
-  // match to the format used in the diagnotic message.
-  fprintf(stderr, "addr=0x%012lx\n", (unsigned long) GlobalData);
+  barrier_init(&barrier, 2);
+  fprintf(stderr, "addr=");
+  print_address(GlobalData);
+  fprintf(stderr, "\n");
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   GlobalData[2] = 43;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/global_race2.cc b/test/tsan/global_race2.cc
index ac994cc..4ab2842 100644
--- a/test/tsan/global_race2.cc
+++ b/test/tsan/global_race2.cc
@@ -1,24 +1,23 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 int x;
 
 void *Thread(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   x = 1;
   return 0;
 }
 
 int main() {
-  // On FreeBSD, the %p conversion specifier works as 0x%x and thus does not
-  // match to the format used in the diagnotic message.
-  fprintf(stderr, "addr2=0x%012lx\n", (unsigned long) &x);
+  barrier_init(&barrier, 2);
+  fprintf(stderr, "addr2=");
+  print_address(&x);
+  fprintf(stderr, "\n");
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   x = 0;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/global_race3.cc b/test/tsan/global_race3.cc
index a3222bb..1531d78 100644
--- a/test/tsan/global_race3.cc
+++ b/test/tsan/global_race3.cc
@@ -1,8 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 namespace XXX {
   struct YYY {
@@ -12,18 +9,20 @@
 }
 
 void *Thread(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   XXX::YYY::ZZZ[0] = 1;
   return 0;
 }
 
 int main() {
-  // On FreeBSD, the %p conversion specifier works as 0x%x and thus does not
-  // match to the format used in the diagnotic message.
-  fprintf(stderr, "addr3=0x%012lx\n", (unsigned long) XXX::YYY::ZZZ);
+  barrier_init(&barrier, 2);
+  fprintf(stderr, "addr3=");
+  print_address(XXX::YYY::ZZZ);
+  fprintf(stderr, "\n");
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   XXX::YYY::ZZZ[0] = 0;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/halt_on_error.cc b/test/tsan/halt_on_error.cc
index 3c55c60..e55454b 100644
--- a/test/tsan/halt_on_error.cc
+++ b/test/tsan/halt_on_error.cc
@@ -1,21 +1,21 @@
 // RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS halt_on_error=1" %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int X;
 
 void *Thread(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   X = 42;
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   fprintf(stderr, "BEFORE\n");
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   X = 43;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
   fprintf(stderr, "AFTER\n");
   return 0;
diff --git a/test/tsan/ignore_free.cc b/test/tsan/ignore_free.cc
index 1df6dce..bb6c6ee 100644
--- a/test/tsan/ignore_free.cc
+++ b/test/tsan/ignore_free.cc
@@ -1,8 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 extern "C" {
 void AnnotateIgnoreReadsBegin(const char *f, int l);
@@ -13,14 +10,16 @@
 
 void *Thread(void *p) {
   *(int*)p = 42;
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int *p = new int(0);
   pthread_t t;
   pthread_create(&t, 0, Thread, p);
-  sleep(1);
+  barrier_wait(&barrier);
   AnnotateIgnoreReadsBegin(__FILE__, __LINE__);
   AnnotateIgnoreWritesBegin(__FILE__, __LINE__);
   free(p);
diff --git a/test/tsan/ignore_lib0.cc b/test/tsan/ignore_lib0.cc
index fe1a355..63c9340 100644
--- a/test/tsan/ignore_lib0.cc
+++ b/test/tsan/ignore_lib0.cc
@@ -3,7 +3,7 @@
 // RUN: echo running w/o suppressions:
 // RUN: LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP
 // RUN: echo running with suppressions:
-// RUN: LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
+// RUN: LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} TSAN_OPTIONS="$TSAN_OPTIONS suppressions='%s.supp'" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
 
 // Tests that interceptors coming from a library specified in called_from_lib
 // suppression are ignored.
diff --git a/test/tsan/ignore_lib1.cc b/test/tsan/ignore_lib1.cc
index 30a9994..ef1f973 100644
--- a/test/tsan/ignore_lib1.cc
+++ b/test/tsan/ignore_lib1.cc
@@ -3,7 +3,7 @@
 // RUN: echo running w/o suppressions:
 // RUN: %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP
 // RUN: echo running with suppressions:
-// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
+// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions='%s.supp'" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
 
 // Tests that interceptors coming from a dynamically loaded library specified
 // in called_from_lib suppression are ignored.
diff --git a/test/tsan/ignore_lib2.cc b/test/tsan/ignore_lib2.cc
index 23a0872..ad3107c 100644
--- a/test/tsan/ignore_lib2.cc
+++ b/test/tsan/ignore_lib2.cc
@@ -1,7 +1,7 @@
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib2_0.so
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib2_1.so
 // RUN: %clangxx_tsan -O1 %s -o %t
-// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %deflake %run %t | FileCheck %s
+// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions='%s.supp'" %deflake %run %t | FileCheck %s
 
 // Tests that called_from_lib suppression matched against 2 libraries
 // causes program crash (this is not supported).
diff --git a/test/tsan/ignore_lib3.cc b/test/tsan/ignore_lib3.cc
index 137109e..96bf313 100644
--- a/test/tsan/ignore_lib3.cc
+++ b/test/tsan/ignore_lib3.cc
@@ -1,6 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib3.so
 // RUN: %clangxx_tsan -O1 %s -o %t
-// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %deflake %run %t | FileCheck %s
+// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions='%s.supp'" %deflake %run %t | FileCheck %s
 
 // Tests that unloading of a library matched against called_from_lib suppression
 // causes program crash (this is not supported).
diff --git a/test/tsan/ignore_malloc.cc b/test/tsan/ignore_malloc.cc
index 0f1fb5e..1f633f0 100644
--- a/test/tsan/ignore_malloc.cc
+++ b/test/tsan/ignore_malloc.cc
@@ -1,8 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 extern "C" {
 void AnnotateIgnoreReadsBegin(const char *f, int l);
@@ -16,7 +13,7 @@
 void *Thread(void *a) {
   int *p = 0;
   while ((p = __atomic_load_n(&g, __ATOMIC_RELAXED)) == 0)
-    usleep(100);
+    usleep(100);  // spin-wait
   *p = 42;
   return 0;
 }
diff --git a/test/tsan/ignore_race.cc b/test/tsan/ignore_race.cc
index c6e067f..cc33b66 100644
--- a/test/tsan/ignore_race.cc
+++ b/test/tsan/ignore_race.cc
@@ -1,7 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 
@@ -16,13 +14,15 @@
   Global = 42;
   AnnotateIgnoreReadsEnd(__FILE__, __LINE__);
   AnnotateIgnoreWritesEnd(__FILE__, __LINE__);
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
-  sleep(1);
+  barrier_wait(&barrier);
   Global = 43;
   pthread_join(t, 0);
   printf("OK\n");
diff --git a/test/tsan/inlined_memcpy_race.cc b/test/tsan/inlined_memcpy_race.cc
index a95576a..e3ed07a 100644
--- a/test/tsan/inlined_memcpy_race.cc
+++ b/test/tsan/inlined_memcpy_race.cc
@@ -1,24 +1,23 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <stdio.h>
+#include "test.h"
 #include <string.h>
-#include <unistd.h>
 
 int x[4], z[4];
 
 void *MemCpyThread(void *a) {
   memcpy((int*)a, z, 16);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *MemSetThread(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   memset((int*)a, 0, 16);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   // Race on x between memcpy and memset
   pthread_create(&t[0], NULL, MemCpyThread, x);
diff --git a/test/tsan/inlined_memcpy_race2.cc b/test/tsan/inlined_memcpy_race2.cc
index 63b560f..37414ba 100644
--- a/test/tsan/inlined_memcpy_race2.cc
+++ b/test/tsan/inlined_memcpy_race2.cc
@@ -1,24 +1,23 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <stdio.h>
+#include "test.h"
 #include <string.h>
-#include <unistd.h>
 
 int y[4], z[4];
 
 void *MemMoveThread(void *a) {
   memmove((int*)a, z, 16);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *MemSetThread(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   memset((int*)a, 0, 16);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   // Race on y between memmove and memset
   pthread_create(&t[0], NULL, MemMoveThread, y);
diff --git a/test/tsan/java.h b/test/tsan/java.h
index d986d08..35fdbc1 100644
--- a/test/tsan/java.h
+++ b/test/tsan/java.h
@@ -1,7 +1,4 @@
-#include <pthread.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 extern "C" {
 typedef unsigned long jptr;  // NOLINT
@@ -18,4 +15,7 @@
 void __tsan_java_mutex_read_unlock(jptr addr);
 void __tsan_java_mutex_lock_rec(jptr addr, int rec);
 int  __tsan_java_mutex_unlock_rec(jptr addr);
+int  __tsan_java_acquire(jptr addr);
+int  __tsan_java_release(jptr addr);
+int  __tsan_java_release_store(jptr addr);
 }
diff --git a/test/tsan/java_finalizer.cc b/test/tsan/java_finalizer.cc
index d5c6a22..acbbf08 100644
--- a/test/tsan/java_finalizer.cc
+++ b/test/tsan/java_finalizer.cc
@@ -2,13 +2,14 @@
 #include "java.h"
 
 void *Thread(void *p) {
-  sleep(1);
+  barrier_wait(&barrier);
   __tsan_java_finalize();
   *(int*)p = 42;
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
   __tsan_java_init(jheap, kHeapSize);
@@ -17,6 +18,7 @@
   pthread_t th;
   pthread_create(&th, 0, Thread, (void*)jheap);
   *(int*)jheap = 43;
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
   fprintf(stderr, "DONE\n");
diff --git a/test/tsan/java_lock.cc b/test/tsan/java_lock.cc
index 36a0f8b..f172052 100644
--- a/test/tsan/java_lock.cc
+++ b/test/tsan/java_lock.cc
@@ -1,12 +1,11 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 #include "java.h"
-#include <unistd.h>
 
 jptr varaddr;
 jptr lockaddr;
 
 void *Thread(void *p) {
-  sleep(1);
+  barrier_wait(&barrier);
   __tsan_java_mutex_lock(lockaddr);
   *(int*)varaddr = 42;
   __tsan_java_mutex_unlock(lockaddr);
@@ -14,6 +13,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
   __tsan_java_init(jheap, kHeapSize);
@@ -26,6 +26,7 @@
   __tsan_java_mutex_lock(lockaddr);
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
   fprintf(stderr, "DONE\n");
diff --git a/test/tsan/java_lock_move.cc b/test/tsan/java_lock_move.cc
index 19c3e35..fe5491d 100644
--- a/test/tsan/java_lock_move.cc
+++ b/test/tsan/java_lock_move.cc
@@ -7,7 +7,7 @@
 jptr lockaddr2;
 
 void *Thread(void *p) {
-  sleep(1);
+  barrier_wait(&barrier);
   __tsan_java_mutex_lock(lockaddr2);
   *(int*)varaddr2 = 42;
   __tsan_java_mutex_unlock(lockaddr2);
@@ -15,6 +15,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
   __tsan_java_init(jheap, kHeapSize);
@@ -31,6 +32,7 @@
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
   __tsan_java_move(varaddr, varaddr2, kBlockSize);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(varaddr2, kBlockSize);
   printf("DONE\n");
diff --git a/test/tsan/java_lock_rec.cc b/test/tsan/java_lock_rec.cc
index 2b0ab0e..f0bf401 100644
--- a/test/tsan/java_lock_rec.cc
+++ b/test/tsan/java_lock_rec.cc
@@ -1,6 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 #include "java.h"
-#include <unistd.h>
 
 jptr varaddr;
 jptr lockaddr;
@@ -14,7 +13,8 @@
     printf("FAILED 0 rec=%d\n", rec);
     exit(1);
   }
-  sleep(2);
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
   __tsan_java_mutex_lock_rec(lockaddr, rec);
   if (*(int*)varaddr != 43) {
     printf("FAILED 3 var=%d\n", *(int*)varaddr);
@@ -26,6 +26,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
   __tsan_java_init(jheap, kHeapSize);
@@ -36,7 +37,7 @@
   lockaddr = jheap + 8;
   pthread_t th;
   pthread_create(&th, 0, Thread, 0);
-  sleep(1);
+  barrier_wait(&barrier);
   __tsan_java_mutex_lock(lockaddr);
   if (*(int*)varaddr != 42) {
     printf("FAILED 1 var=%d\n", *(int*)varaddr);
@@ -44,6 +45,7 @@
   }
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
   printf("DONE\n");
diff --git a/test/tsan/java_lock_rec_race.cc b/test/tsan/java_lock_rec_race.cc
index 841aa39..3da8ad0 100644
--- a/test/tsan/java_lock_rec_race.cc
+++ b/test/tsan/java_lock_rec_race.cc
@@ -1,6 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include "java.h"
-#include <unistd.h>
 
 jptr varaddr;
 jptr lockaddr;
@@ -15,7 +14,8 @@
     exit(1);
   }
   *(int*)varaddr = 42;
-  sleep(2);
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
   __tsan_java_mutex_lock_rec(lockaddr, rec);
   __tsan_java_mutex_unlock(lockaddr);
   __tsan_java_mutex_unlock(lockaddr);
@@ -24,6 +24,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
   __tsan_java_init(jheap, kHeapSize);
@@ -34,10 +35,11 @@
   lockaddr = jheap + 8;
   pthread_t th;
   pthread_create(&th, 0, Thread, 0);
-  sleep(1);
+  barrier_wait(&barrier);
   __tsan_java_mutex_lock(lockaddr);
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
   printf("DONE\n");
diff --git a/test/tsan/java_move_overlap.cc b/test/tsan/java_move_overlap.cc
index 12955b4..7ed98ef 100644
--- a/test/tsan/java_move_overlap.cc
+++ b/test/tsan/java_move_overlap.cc
@@ -13,7 +13,7 @@
 jptr lockaddr2_new;
 
 void *Thread(void *p) {
-  sleep(1);
+  barrier_wait(&barrier);
   __tsan_java_mutex_lock(lockaddr1_new);
   *(char*)varaddr1_new = 43;
   __tsan_java_mutex_unlock(lockaddr1_new);
@@ -24,6 +24,7 @@
 }
 
 int main(int argc, char **argv) {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   void *jheap = malloc(kHeapSize);
   jheap = (char*)jheap + 8;
@@ -62,6 +63,7 @@
   __tsan_java_mutex_unlock(lockaddr2_old);
 
   __tsan_java_move(varaddr1_old, varaddr1_new, kBlockSize);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(varaddr1_new, kBlockSize);
   printf("DONE\n");
diff --git a/test/tsan/java_move_overlap_race.cc b/test/tsan/java_move_overlap_race.cc
index 2b3769b..874b90b 100644
--- a/test/tsan/java_move_overlap_race.cc
+++ b/test/tsan/java_move_overlap_race.cc
@@ -9,13 +9,14 @@
 jptr varaddr2_new;
 
 void *Thread(void *p) {
-  sleep(1);
+  barrier_wait(&barrier);
   *(int*)varaddr1_new = 43;
   *(int*)varaddr2_new = 43;
   return 0;
 }
 
 int main(int argc, char **argv) {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   void *jheap = malloc(kHeapSize);
   jheap = (char*)jheap + 8;
@@ -42,6 +43,7 @@
   *(int*)varaddr2_old = 43;
 
   __tsan_java_move(varaddr1_old, varaddr1_new, kBlockSize);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(varaddr1_new, kBlockSize);
   printf("DONE\n");
diff --git a/test/tsan/java_race_move.cc b/test/tsan/java_race_move.cc
index 8a51be9..6d1b092 100644
--- a/test/tsan/java_race_move.cc
+++ b/test/tsan/java_race_move.cc
@@ -5,12 +5,13 @@
 jptr varaddr2;
 
 void *Thread(void *p) {
-  sleep(1);
+  barrier_wait(&barrier);
   *(int*)varaddr2 = 42;
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
   __tsan_java_init(jheap, kHeapSize);
@@ -23,6 +24,7 @@
   pthread_create(&th, 0, Thread, 0);
   *(int*)varaddr = 43;
   __tsan_java_move(varaddr, varaddr2, kBlockSize);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(varaddr2, kBlockSize);
   fprintf(stderr, "DONE\n");
diff --git a/test/tsan/java_rwlock.cc b/test/tsan/java_rwlock.cc
index b03afa6..a4cc92a 100644
--- a/test/tsan/java_rwlock.cc
+++ b/test/tsan/java_rwlock.cc
@@ -1,12 +1,11 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 #include "java.h"
-#include <unistd.h>
 
 jptr varaddr;
 jptr lockaddr;
 
 void *Thread(void *p) {
-  sleep(1);
+  barrier_wait(&barrier);
   __tsan_java_mutex_read_lock(lockaddr);
   *(int*)varaddr = 42;
   __tsan_java_mutex_read_unlock(lockaddr);
@@ -14,6 +13,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int const kHeapSize = 1024 * 1024;
   jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
   __tsan_java_init(jheap, kHeapSize);
@@ -26,6 +26,7 @@
   __tsan_java_mutex_lock(lockaddr);
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
   printf("DONE\n");
diff --git a/test/tsan/java_volatile.cc b/test/tsan/java_volatile.cc
new file mode 100644
index 0000000..885b4f2
--- /dev/null
+++ b/test/tsan/java_volatile.cc
@@ -0,0 +1,42 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "java.h"
+
+jptr varaddr;
+jptr lockaddr;
+
+void *Thread(void *p) {
+  while (__atomic_load_n((int*)lockaddr, __ATOMIC_RELAXED) == 0)
+    usleep(1000);  // spin-wait
+  __tsan_java_acquire(lockaddr);
+  *(int*)varaddr = 42;
+  return 0;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  int const kHeapSize = 1024 * 1024;
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
+  const int kBlockSize = 16;
+  __tsan_java_alloc(jheap, kBlockSize);
+  varaddr = jheap;
+  lockaddr = jheap + 8;
+  pthread_t th;
+  pthread_create(&th, 0, Thread, 0);
+  *(int*)varaddr = 43;
+  __tsan_java_release(lockaddr);
+  __atomic_store_n((int*)lockaddr, 1, __ATOMIC_RELAXED);
+  pthread_join(th, 0);
+  *(int*)lockaddr = 0;
+  pthread_create(&th, 0, Thread, 0);
+  *(int*)varaddr = 43;
+  __tsan_java_release_store(lockaddr);
+  __atomic_store_n((int*)lockaddr, 1, __ATOMIC_RELAXED);
+  pthread_join(th, 0);
+  __tsan_java_free(jheap, kBlockSize);
+  fprintf(stderr, "DONE\n");
+  return __tsan_java_fini();
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/load_shared_lib.cc b/test/tsan/load_shared_lib.cc
index a27dc1c..b7934b8 100644
--- a/test/tsan/load_shared_lib.cc
+++ b/test/tsan/load_shared_lib.cc
@@ -7,35 +7,39 @@
 
 #ifdef BUILD_SO
 
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 int GLOB_SHARED = 0;
 
 extern "C"
+void init_so() {
+  barrier_init(&barrier, 2);
+}
+
+extern "C"
 void *write_from_so(void *unused) {
-  if (unused)
-    sleep(1);
+  if (unused == 0)
+    barrier_wait(&barrier);
   GLOB_SHARED++;
+  if (unused != 0)
+    barrier_wait(&barrier);
   return NULL;
 }
 
 #else  // BUILD_SO
 
+#include "test.h"
 #include <dlfcn.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <unistd.h>
-
 #include <string>
 
 int GLOB = 0;
 
 void *write_glob(void *unused) {
-  if (unused)
-    sleep(1);
+  if (unused == 0)
+    barrier_wait(&barrier);
   GLOB++;
+  if (unused != 0)
+    barrier_wait(&barrier);
   return NULL;
 }
 
@@ -48,6 +52,7 @@
 }
 
 int main(int argc, char *argv[]) {
+  barrier_init(&barrier, 2);
   std::string path = std::string(argv[0]) + std::string("-so.so");
   race_two_threads(write_glob);
   // CHECK: write_glob
@@ -56,6 +61,9 @@
     printf("error in dlopen(): %s\n", dlerror());
     return 1;
   }
+  void (*init_so)();
+  *(void **)&init_so = dlsym(lib, "init_so");
+  init_so();
   void *(*write_from_so)(void *unused);
   *(void **)&write_from_so = dlsym(lib, "write_from_so");
   race_two_threads(write_from_so);
diff --git a/test/tsan/malloc_stack.cc b/test/tsan/malloc_stack.cc
index 6027360..ba1d62b 100644
--- a/test/tsan/malloc_stack.cc
+++ b/test/tsan/malloc_stack.cc
@@ -1,20 +1,21 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 _Atomic(int*) p;
 
 void *thr(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   int *pp = __c11_atomic_load(&p, __ATOMIC_RELAXED);
   *pp = 42;
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t th;
   pthread_create(&th, 0, thr, p);
   __c11_atomic_store(&p, new int, __ATOMIC_RELAXED);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
 }
 
diff --git a/test/tsan/map32bit.cc b/test/tsan/map32bit.cc
index 3a76fa2..d9a0465 100644
--- a/test/tsan/map32bit.cc
+++ b/test/tsan/map32bit.cc
@@ -1,21 +1,23 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stddef.h>
+#include "test.h"
 #include <stdint.h>
-#include <unistd.h>
 #include <errno.h>
 #include <sys/mman.h>
 
 // Test for issue:
 // https://code.google.com/p/thread-sanitizer/issues/detail?id=5
 
+// MAP_32BIT flag for mmap is supported only for x86_64.
+// XFAIL: mips64
+
 void *Thread(void *ptr) {
   *(int*)ptr = 42;
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   void *ptr = mmap(0, 128 << 10, PROT_READ|PROT_WRITE,
       MAP_32BIT|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   fprintf(stderr, "ptr=%p\n", ptr);
@@ -29,7 +31,7 @@
   }
   pthread_t t;
   pthread_create(&t, 0, Thread, ptr);
-  sleep(1);
+  barrier_wait(&barrier);
   *(int*)ptr = 42;
   pthread_join(t, 0);
   munmap(ptr, 128 << 10);
diff --git a/test/tsan/memcpy_race.cc b/test/tsan/memcpy_race.cc
index 8ec8e0a..d495773 100644
--- a/test/tsan/memcpy_race.cc
+++ b/test/tsan/memcpy_race.cc
@@ -1,9 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <stdio.h>
+#include "test.h"
 #include <string.h>
-#include <unistd.h>
 
 char *data = new char[10];
 char *data1 = new char[10];
@@ -12,17 +9,19 @@
 void *Thread1(void *x) {
   static volatile int size = 1;
   memcpy(data+5, data1, size);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
   static volatile int size = 4;
-  sleep(1);
+  barrier_wait(&barrier);
   memcpy(data+3, data2, size);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   fprintf(stderr, "addr=%p\n", &data[5]);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
diff --git a/test/tsan/mmap_large.cc b/test/tsan/mmap_large.cc
index 44233b0..4ae4c08 100644
--- a/test/tsan/mmap_large.cc
+++ b/test/tsan/mmap_large.cc
@@ -1,10 +1,15 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 #include <stdint.h>
 #include <stdio.h>
+#include <errno.h>
 #include <sys/mman.h>
 
 int main() {
-  const size_t kLog2Size = 40;
+#ifdef __x86_64__
+  const size_t kLog2Size = 39;
+#elif defined(__mips64)
+  const size_t kLog2Size = 32;
+#endif
   const uintptr_t kLocation = 0x40ULL << kLog2Size;
   void *p = mmap(
       reinterpret_cast<void*>(kLocation),
@@ -12,7 +17,7 @@
       PROT_READ|PROT_WRITE,
       MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE,
       -1, 0);
-  fprintf(stderr, "DONE %p\n", p);
+  fprintf(stderr, "DONE %p %d\n", p, errno);
   return p == MAP_FAILED;
 }
 
diff --git a/test/tsan/mop_with_offset.cc b/test/tsan/mop_with_offset.cc
index e44c78b..c67e81e 100644
--- a/test/tsan/mop_with_offset.cc
+++ b/test/tsan/mop_with_offset.cc
@@ -1,23 +1,22 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread1(void *x) {
   int *p = (int*)x;
   p[0] = 1;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   char *p = (char*)x;
   p[2] = 1;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int *data = new int(42);
   fprintf(stderr, "ptr1=%p\n", data);
   fprintf(stderr, "ptr2=%p\n", (char*)data + 2);
diff --git a/test/tsan/mop_with_offset2.cc b/test/tsan/mop_with_offset2.cc
index a465d5f..4602673 100644
--- a/test/tsan/mop_with_offset2.cc
+++ b/test/tsan/mop_with_offset2.cc
@@ -1,11 +1,8 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   int *p = (int*)x;
   p[0] = 1;
   return NULL;
@@ -14,10 +11,12 @@
 void *Thread2(void *x) {
   char *p = (char*)x;
   p[2] = 1;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int *data = new int(42);
   fprintf(stderr, "ptr1=%p\n", data);
   fprintf(stderr, "ptr2=%p\n", (char*)data + 2);
diff --git a/test/tsan/mutex_cycle2.c b/test/tsan/mutex_cycle2.c
index 031830d..85d19a0 100644
--- a/test/tsan/mutex_cycle2.c
+++ b/test/tsan/mutex_cycle2.c
@@ -2,10 +2,10 @@
 // RUN:                                 not %run %t 2>&1 | FileCheck %s
 // RUN: TSAN_OPTIONS=detect_deadlocks=1 not %run %t 2>&1 | FileCheck %s
 // RUN: TSAN_OPTIONS=detect_deadlocks=0     %run %t 2>&1 | FileCheck %s --check-prefix=DISABLED
-// RUN: echo "deadlock:main" > %t.sup
-// RUN: TSAN_OPTIONS="suppressions=%t.sup" %run %t 2>&1 | FileCheck %s --check-prefix=DISABLED
-// RUN: echo "deadlock:zzzz" > %t.sup
-// RUN: TSAN_OPTIONS="suppressions=%t.sup" not %run %t 2>&1 | FileCheck %s
+// RUN: echo "deadlock:main" > %t.supp
+// RUN: TSAN_OPTIONS="suppressions='%t.supp'" %run %t 2>&1 | FileCheck %s --check-prefix=DISABLED
+// RUN: echo "deadlock:zzzz" > %t.supp
+// RUN: TSAN_OPTIONS="suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 
diff --git a/test/tsan/mutexset1.cc b/test/tsan/mutexset1.cc
index 72964ed..407cfe5 100644
--- a/test/tsan/mutexset1.cc
+++ b/test/tsan/mutexset1.cc
@@ -1,13 +1,11 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 pthread_mutex_t mtx;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_mutex_lock(&mtx);
   Global++;
   pthread_mutex_unlock(&mtx);
@@ -16,10 +14,12 @@
 
 void *Thread2(void *x) {
   Global--;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   // CHECK: WARNING: ThreadSanitizer: data race
   // CHECK:   Write of size 4 at {{.*}} by thread T1
   // CHECK:                         (mutexes: write [[M1:M[0-9]+]]):
diff --git a/test/tsan/mutexset2.cc b/test/tsan/mutexset2.cc
index 01a5f5d..2a3e5bb 100644
--- a/test/tsan/mutexset2.cc
+++ b/test/tsan/mutexset2.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 pthread_mutex_t mtx;
@@ -10,16 +8,18 @@
   pthread_mutex_lock(&mtx);
   Global++;
   pthread_mutex_unlock(&mtx);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   Global--;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   // CHECK: WARNING: ThreadSanitizer: data race
   // CHECK:   Write of size 4 at {{.*}} by thread T2:
   // CHECK:   Previous write of size 4 at {{.*}} by thread T1
diff --git a/test/tsan/mutexset3.cc b/test/tsan/mutexset3.cc
index e14bb11..ce64cf8 100644
--- a/test/tsan/mutexset3.cc
+++ b/test/tsan/mutexset3.cc
@@ -1,14 +1,12 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 pthread_mutex_t mtx1;
 pthread_mutex_t mtx2;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_mutex_lock(&mtx1);
   pthread_mutex_lock(&mtx2);
   Global++;
@@ -19,10 +17,12 @@
 
 void *Thread2(void *x) {
   Global--;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   // CHECK: WARNING: ThreadSanitizer: data race
   // CHECK: Write of size 4 at {{.*}} by thread T1
   // CHECK:               (mutexes: write [[M1:M[0-9]+]], write [[M2:M[0-9]+]]):
diff --git a/test/tsan/mutexset4.cc b/test/tsan/mutexset4.cc
index db860e0..b961efd 100644
--- a/test/tsan/mutexset4.cc
+++ b/test/tsan/mutexset4.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 pthread_mutex_t mtx1;
@@ -13,16 +11,18 @@
   Global++;
   pthread_mutex_unlock(&mtx2);
   pthread_mutex_unlock(&mtx1);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   Global--;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   // CHECK: WARNING: ThreadSanitizer: data race
   // CHECK:   Write of size 4 at {{.*}} by thread T2:
   // CHECK:   Previous write of size 4 at {{.*}} by thread T1
diff --git a/test/tsan/mutexset5.cc b/test/tsan/mutexset5.cc
index e1cc2fc..8ef9af0 100644
--- a/test/tsan/mutexset5.cc
+++ b/test/tsan/mutexset5.cc
@@ -1,14 +1,12 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 pthread_mutex_t mtx1;
 pthread_mutex_t mtx2;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_mutex_lock(&mtx1);
   Global++;
   pthread_mutex_unlock(&mtx1);
@@ -19,10 +17,12 @@
   pthread_mutex_lock(&mtx2);
   Global--;
   pthread_mutex_unlock(&mtx2);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   // CHECK: WARNING: ThreadSanitizer: data race
   // CHECK:   Write of size 4 at {{.*}} by thread T1
   // CHECK:                              (mutexes: write [[M1:M[0-9]+]]):
diff --git a/test/tsan/mutexset6.cc b/test/tsan/mutexset6.cc
index 07dcc0a..f4251db 100644
--- a/test/tsan/mutexset6.cc
+++ b/test/tsan/mutexset6.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 pthread_mutex_t mtx1;
@@ -9,7 +7,7 @@
 pthread_rwlock_t mtx3;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_mutex_lock(&mtx1);
   Global++;
   pthread_mutex_unlock(&mtx1);
@@ -24,10 +22,12 @@
   Global--;
   pthread_spin_unlock(&mtx2);
   pthread_rwlock_unlock(&mtx3);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   // CHECK: WARNING: ThreadSanitizer: data race
   // CHECK:   Write of size 4 at {{.*}} by thread T1
   // CHECK:                          (mutexes: write [[M1:M[0-9]+]]):
diff --git a/test/tsan/mutexset7.cc b/test/tsan/mutexset7.cc
index 1217484..d3a221d 100644
--- a/test/tsan/mutexset7.cc
+++ b/test/tsan/mutexset7.cc
@@ -1,13 +1,11 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 __thread int huge[1024*1024];
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   Global++;
   return NULL;
 }
@@ -20,10 +18,12 @@
   pthread_mutex_unlock(mtx);
   pthread_mutex_destroy(mtx);
   delete mtx;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/mutexset8.cc b/test/tsan/mutexset8.cc
index 3e1ab8c..40d5d04 100644
--- a/test/tsan/mutexset8.cc
+++ b/test/tsan/mutexset8.cc
@@ -1,13 +1,11 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 pthread_mutex_t *mtx;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_mutex_lock(mtx);
   Global++;
   pthread_mutex_unlock(mtx);
@@ -16,10 +14,12 @@
 
 void *Thread2(void *x) {
   Global--;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   // CHECK: WARNING: ThreadSanitizer: data race
   // CHECK:   Write of size 4 at {{.*}} by thread T1
   // CHECK:                         (mutexes: write [[M1:M[0-9]+]]):
diff --git a/test/tsan/pthread_atfork_deadlock.c b/test/tsan/pthread_atfork_deadlock.c
index 0f33b90..4aeec82 100644
--- a/test/tsan/pthread_atfork_deadlock.c
+++ b/test/tsan/pthread_atfork_deadlock.c
@@ -4,14 +4,12 @@
 // When the data race was reported, pthread_atfork() handler used to be
 // executed which caused another race report in the same thread, which resulted
 // in a deadlock.
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int glob = 0;
 
 void *worker(void *unused) {
-  sleep(1);
+  barrier_wait(&barrier);
   glob++;
   return NULL;
 }
@@ -22,10 +20,12 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_atfork(atfork, NULL, NULL);
   pthread_t t;
   pthread_create(&t, NULL, worker, NULL);
   glob++;
+  barrier_wait(&barrier);
   pthread_join(t, NULL);
   // CHECK: ThreadSanitizer: data race
   // CHECK-NOT: ATFORK
diff --git a/test/tsan/race_on_barrier.c b/test/tsan/race_on_barrier.c
index 99b18fe..cf8a4cb 100644
--- a/test/tsan/race_on_barrier.c
+++ b/test/tsan/race_on_barrier.c
@@ -1,25 +1,24 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 pthread_barrier_t B;
 int Global;
 
 void *Thread1(void *x) {
   pthread_barrier_init(&B, 0, 2);
+  barrier_wait(&barrier);
   pthread_barrier_wait(&B);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_barrier_wait(&B);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, NULL, Thread1, NULL);
   Thread2(0);
diff --git a/test/tsan/race_on_mutex.c b/test/tsan/race_on_mutex.c
index b4adeeb..7bd461b 100644
--- a/test/tsan/race_on_mutex.c
+++ b/test/tsan/race_on_mutex.c
@@ -1,8 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 pthread_mutex_t Mtx;
 int Global;
@@ -12,11 +9,12 @@
   pthread_mutex_lock(&Mtx);
   Global = 42;
   pthread_mutex_unlock(&Mtx);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_mutex_lock(&Mtx);
   Global = 43;
   pthread_mutex_unlock(&Mtx);
@@ -24,6 +22,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
@@ -36,7 +35,7 @@
 // CHECK:      WARNING: ThreadSanitizer: data race
 // CHECK-NEXT:   Atomic read of size 1 at {{.*}} by thread T2:
 // CHECK-NEXT:     #0 pthread_mutex_lock
-// CHECK-NEXT:     #1 Thread2{{.*}} {{.*}}race_on_mutex.c:20{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #1 Thread2{{.*}} {{.*}}race_on_mutex.c:18{{(:3)?}} ({{.*}})
 // CHECK:        Previous write of size 1 at {{.*}} by thread T1:
 // CHECK-NEXT:     #0 pthread_mutex_init {{.*}} ({{.*}})
-// CHECK-NEXT:     #1 Thread1{{.*}} {{.*}}race_on_mutex.c:11{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #1 Thread1{{.*}} {{.*}}race_on_mutex.c:8{{(:3)?}} ({{.*}})
diff --git a/test/tsan/race_on_mutex2.c b/test/tsan/race_on_mutex2.c
index 1796d0c..6ee5438 100644
--- a/test/tsan/race_on_mutex2.c
+++ b/test/tsan/race_on_mutex2.c
@@ -1,21 +1,20 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread(void *x) {
   pthread_mutex_lock((pthread_mutex_t*)x);
   pthread_mutex_unlock((pthread_mutex_t*)x);
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_mutex_t Mtx;
   pthread_mutex_init(&Mtx, 0);
   pthread_t t;
   pthread_create(&t, 0, Thread, &Mtx);
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_mutex_destroy(&Mtx);
   pthread_join(t, 0);
   return 0;
diff --git a/test/tsan/race_on_puts.cc b/test/tsan/race_on_puts.cc
index 1f2b4db..f254182 100644
--- a/test/tsan/race_on_puts.cc
+++ b/test/tsan/race_on_puts.cc
@@ -1,21 +1,22 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 char s[] = "abracadabra";
 
 void *Thread0(void *p) {
   puts(s);
+  barrier_wait(&barrier);
   return 0;
 }
 
 void *Thread1(void *p) {
+  barrier_wait(&barrier);
   s[3] = 'z';
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t th[2];
   pthread_create(&th[0], 0, Thread0, 0);
   pthread_create(&th[1], 0, Thread1, 0);
diff --git a/test/tsan/race_on_read.cc b/test/tsan/race_on_read.cc
index 1ec0522..d388bba 100644
--- a/test/tsan/race_on_read.cc
+++ b/test/tsan/race_on_read.cc
@@ -1,31 +1,35 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
-#include <unistd.h>
 #include <errno.h>
 
 int fd;
 char buf;
 
-void *Thread(void *x) {
-  sleep(1);
+void *Thread1(void *x) {
+  barrier_wait(&barrier);
   read(fd, &buf, 1);
   return NULL;
 }
 
+void *Thread2(void *x) {
+  read(fd, &buf, 1);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
 int main() {
+  barrier_init(&barrier, 2);
   fd = open("/dev/random", O_RDONLY);
   if (fd < 0) {
     fprintf(stderr, "failed to open /dev/random (%d)\n", errno);
     return 1;
   }
   pthread_t t[2];
-  pthread_create(&t[0], NULL, Thread, NULL);
-  pthread_create(&t[1], NULL, Thread, NULL);
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
   close(fd);
diff --git a/test/tsan/race_on_speculative_load.cc b/test/tsan/race_on_speculative_load.cc
index f816db9..b50b696 100644
--- a/test/tsan/race_on_speculative_load.cc
+++ b/test/tsan/race_on_speculative_load.cc
@@ -1,9 +1,8 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t | FileCheck %s
 // Regtest for https://code.google.com/p/thread-sanitizer/issues/detail?id=40
 // This is a correct program and tsan should not report a race.
-#include <pthread.h>
-#include <unistd.h>
-#include <stdio.h>
+#include "test.h"
+
 int g;
 __attribute__((noinline))
 int foo(int cond) {
@@ -11,17 +10,21 @@
     return g;
   return 0;
 }
+
 void *Thread1(void *p) {
+  barrier_wait(&barrier);
   long res = foo((long)p);
-  sleep(1);
   return (void*) res;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread1, 0);
   g = 1;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
   printf("PASS\n");
+  // CHECK-NOT: ThreadSanitizer: data race
   // CHECK: PASS
 }
diff --git a/test/tsan/race_on_write.cc b/test/tsan/race_on_write.cc
index 484bbb7..147591a 100644
--- a/test/tsan/race_on_write.cc
+++ b/test/tsan/race_on_write.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -11,7 +9,7 @@
 
 void *Thread1(void *x) {
   buf = 1;
-  sleep(1);
+  barrier_wait(&barrier);
   return NULL;
 }
 
@@ -21,11 +19,12 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   fd = open("/dev/null", O_WRONLY);
   if (fd < 0) return 1;
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
diff --git a/test/tsan/race_with_finished_thread.cc b/test/tsan/race_with_finished_thread.cc
index d287600..755a7bd 100644
--- a/test/tsan/race_with_finished_thread.cc
+++ b/test/tsan/race_with_finished_thread.cc
@@ -1,9 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
+#include "test.h"
 
 // Ensure that we can restore a stack of a finished thread.
 
@@ -15,16 +11,19 @@
 
 void *Thread1(void *x) {
   foobar(&g_data);
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
+  sleep(1); // let the thread finish and exit
   g_data = 43;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/real_deadlock_detector_stress_test.cc b/test/tsan/real_deadlock_detector_stress_test.cc
new file mode 100644
index 0000000..67c878f
--- /dev/null
+++ b/test/tsan/real_deadlock_detector_stress_test.cc
@@ -0,0 +1,186 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <errno.h>
+#include <vector>
+#include <algorithm>
+
+const int kThreads = 4;
+const int kMutexes = 16 << 10;
+const int kIters = 400 << 10;
+const int kMaxPerThread = 10;
+
+const int kStateInited = 0;
+const int kStateNotInited = -1;
+const int kStateLocked = -2;
+
+struct Mutex {
+  int state;
+  pthread_rwlock_t m;
+};
+
+Mutex mtx[kMutexes];
+
+void check(int res) {
+  if (res != 0) {
+    printf("SOMETHING HAS FAILED\n");
+    exit(1);
+  }
+}
+
+bool cas(int *a, int oldval, int newval) {
+  return __atomic_compare_exchange_n(a, &oldval, newval, false,
+      __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
+}
+
+void *Thread(void *seed) {
+  unsigned rnd = (unsigned)(unsigned long)seed;
+  int err;
+  std::vector<int> locked;
+  for (int i = 0; i < kIters; i++) {
+    int what = rand_r(&rnd) % 10;
+    if (what < 4 && locked.size() < kMaxPerThread) {
+      // lock
+      int max_locked = -1;
+      if (!locked.empty()) {
+        max_locked = *std::max_element(locked.begin(), locked.end());
+        if (max_locked == kMutexes - 1) {
+          i--;
+          continue;
+        }
+      }
+      int id = (rand_r(&rnd) % (kMutexes - max_locked - 1)) + max_locked + 1;
+      Mutex *m = &mtx[id];
+      // init the mutex if necessary or acquire a reference
+      for (;;) {
+        int old = __atomic_load_n(&m->state, __ATOMIC_RELAXED);
+        if (old == kStateLocked) {
+          pthread_yield();
+          continue;
+        }
+        int newv = old + 1;
+        if (old == kStateNotInited)
+          newv = kStateLocked;
+        if (cas(&m->state, old, newv)) {
+          if (old == kStateNotInited) {
+            if ((err = pthread_rwlock_init(&m->m, 0))) {
+              fprintf(stderr, "pthread_rwlock_init failed with %d\n", err);
+              exit(1);
+            }
+            if (!cas(&m->state, kStateLocked, 1)) {
+              fprintf(stderr, "init commit failed\n");
+              exit(1);
+            }
+          }
+          break;
+        }
+      }
+      // now we have an inited and referenced mutex, choose what to do
+      bool failed = false;
+      switch (rand_r(&rnd) % 4) {
+      case 0:
+        if ((err = pthread_rwlock_wrlock(&m->m))) {
+          fprintf(stderr, "pthread_rwlock_wrlock failed with %d\n", err);
+          exit(1);
+        }
+        break;
+      case 1:
+        if ((err = pthread_rwlock_rdlock(&m->m))) {
+          fprintf(stderr, "pthread_rwlock_rdlock failed with %d\n", err);
+          exit(1);
+        }
+        break;
+      case 2:
+        err = pthread_rwlock_trywrlock(&m->m);
+        if (err != 0 && err != EBUSY) {
+          fprintf(stderr, "pthread_rwlock_trywrlock failed with %d\n", err);
+          exit(1);
+        }
+        failed = err == EBUSY;
+        break;
+      case 3:
+        err = pthread_rwlock_tryrdlock(&m->m);
+        if (err != 0 && err != EBUSY) {
+          fprintf(stderr, "pthread_rwlock_tryrdlock failed with %d\n", err);
+          exit(1);
+        }
+        failed = err == EBUSY;
+        break;
+      }
+      if (failed) {
+        if (__atomic_fetch_sub(&m->state, 1, __ATOMIC_ACQ_REL) <= 0) {
+          fprintf(stderr, "failed to unref after failed trylock\n");
+          exit(1);
+        }
+        continue;
+      }
+      locked.push_back(id);
+    } else if (what < 9 && !locked.empty()) {
+      // unlock
+      int pos = rand_r(&rnd) % locked.size();
+      int id = locked[pos];
+      locked[pos] = locked[locked.size() - 1];
+      locked.pop_back();
+      Mutex *m = &mtx[id];
+      if ((err = pthread_rwlock_unlock(&m->m))) {
+        fprintf(stderr, "pthread_rwlock_unlock failed with %d\n", err);
+        exit(1);
+      }
+      if (__atomic_fetch_sub(&m->state, 1, __ATOMIC_ACQ_REL) <= 0) {
+        fprintf(stderr, "failed to unref after unlock\n");
+        exit(1);
+      }
+    } else {
+      // Destroy a random mutex.
+      int id = rand_r(&rnd) % kMutexes;
+      Mutex *m = &mtx[id];
+      if (!cas(&m->state, kStateInited, kStateLocked)) {
+        i--;
+        continue;
+      }
+      if ((err = pthread_rwlock_destroy(&m->m))) {
+        fprintf(stderr, "pthread_rwlock_destroy failed with %d\n", err);
+        exit(1);
+      }
+      if (!cas(&m->state, kStateLocked, kStateNotInited)) {
+        fprintf(stderr, "destroy commit failed\n");
+        exit(1);
+      }
+    }
+  }
+  // Unlock all previously locked mutexes, otherwise other threads can deadlock.
+  for (int i = 0; i < locked.size(); i++) {
+    int id = locked[i];
+    Mutex *m = &mtx[id];
+    if ((err = pthread_rwlock_unlock(&m->m))) {
+      fprintf(stderr, "pthread_rwlock_unlock failed with %d\n", err);
+      exit(1);
+    }
+  }
+  return 0;
+}
+
+int main() {
+  timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  unsigned s = (unsigned)ts.tv_nsec;
+  fprintf(stderr, "seed %d\n", s);
+  srand(s);
+  for (int i = 0; i < kMutexes; i++)
+    mtx[i].state = kStateNotInited;
+  pthread_t t[kThreads];
+  for (int i = 0; i < kThreads; i++)
+    pthread_create(&t[i], 0, Thread, (void*)(unsigned long)rand());
+  for (int i = 0; i < kThreads; i++)
+    pthread_join(t[i], 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer
+// CHECK: DONE
+
diff --git a/test/tsan/restore_stack.cc b/test/tsan/restore_stack.cc
new file mode 100644
index 0000000..39c1101
--- /dev/null
+++ b/test/tsan/restore_stack.cc
@@ -0,0 +1,50 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+int Global;
+volatile int x;
+const int kSize = 64 << 10;
+volatile long data[kSize];
+
+void __attribute__((noinline)) foo() {
+  for (int i = 0; i < kSize; i++)
+    data[i]++;
+}
+
+void *Thread(void *a) {
+  __atomic_store_n(&x, 1, __ATOMIC_RELEASE);
+  foo();
+  data[0]++;
+  if (a != 0)
+    barrier_wait(&barrier);
+  return 0;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  for (int i = 0; i < 50; i++) {
+    pthread_t t;
+    pthread_create(&t, 0, Thread, 0);
+    pthread_join(t, 0);
+  }
+  pthread_t t;
+  pthread_create(&t, 0, Thread, (void*)1);
+  barrier_wait(&barrier);
+  for (int i = 0; i < kSize; i++)
+    data[i]++;
+  pthread_join(t, 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// Previously this test produced bogus stack traces like:
+//   Previous write of size 8 at 0x0000006a8ff8 by thread T17:
+//     #0 foo() restore_stack.cc:13:5 (restore_stack.cc.exe+0x00000040622c)
+//     #1 Thread(void*) restore_stack.cc:18:3 (restore_stack.cc.exe+0x000000406283)
+//     #2 __tsan_thread_start_func rtl/tsan_interceptors.cc:886 (restore_stack.cc.exe+0x00000040a749)
+//     #3 Thread(void*) restore_stack.cc:18:3 (restore_stack.cc.exe+0x000000406283)
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: __tsan_thread_start_func
+// CHECK-NOT: #3 Thread
+// CHECK: DONE
diff --git a/test/tsan/signal_errno.cc b/test/tsan/signal_errno.cc
index 1fa20f3..8305e84 100644
--- a/test/tsan/signal_errno.cc
+++ b/test/tsan/signal_errno.cc
@@ -1,10 +1,7 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include "test.h"
 #include <signal.h>
 #include <sys/types.h>
-#include <unistd.h>
 #include <errno.h>
 
 pthread_t mainth;
@@ -16,12 +13,13 @@
 }
 
 static void* sendsignal(void *p) {
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_kill(mainth, SIGPROF);
   return 0;
 }
 
 static __attribute__((noinline)) void loop() {
+  barrier_wait(&barrier);
   while (done == 0) {
     volatile char *p = (char*)malloc(1);
     p[0] = 0;
@@ -31,6 +29,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   mainth = pthread_self();
   struct sigaction act = {};
   act.sa_sigaction = &MyHandler;
diff --git a/test/tsan/signal_malloc.cc b/test/tsan/signal_malloc.cc
index 06932fb..1dccb13 100644
--- a/test/tsan/signal_malloc.cc
+++ b/test/tsan/signal_malloc.cc
@@ -1,9 +1,7 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <stdio.h>
-#include <stdlib.h>
+#include "test.h"
 #include <signal.h>
 #include <sys/types.h>
-#include <unistd.h>
 
 static void handler(int, siginfo_t*, void*) {
   // CHECK: WARNING: ThreadSanitizer: signal-unsafe call inside of a signal
@@ -20,7 +18,7 @@
   act.sa_sigaction = &handler;
   sigaction(SIGPROF, &act, 0);
   kill(getpid(), SIGPROF);
-  sleep(1);
+  sleep(1);  // let the signal handler run
   return 0;
 }
 
diff --git a/test/tsan/signal_recursive.cc b/test/tsan/signal_recursive.cc
index d92ba97..825338d 100644
--- a/test/tsan/signal_recursive.cc
+++ b/test/tsan/signal_recursive.cc
@@ -3,17 +3,13 @@
 // Test case for recursive signal handlers, adopted from:
 // https://code.google.com/p/thread-sanitizer/issues/detail?id=71
 
-#include <pthread.h>
+#include "test.h"
 #include <semaphore.h>
 #include <signal.h>
-#include <unistd.h>
 #include <errno.h>
-#include <stdlib.h>
-#include <stdio.h>
 
 static const int kSigSuspend = SIGUSR1;
 static const int kSigRestart = SIGUSR2;
-static sigset_t g_suspend_handler_mask;
 
 static sem_t g_thread_suspend_ack_sem;
 
@@ -25,7 +21,7 @@
   // Mono walks thread stacks to detect unreferenced objects.
   // If last object reference is kept in register the object will be collected
   // This is why threads can't be suspended with something like pthread_suspend
-};
+}
 
 static void fail(const char *what) {
   fprintf(stderr, "FAILED: %s (errno=%d)\n", what, errno);
@@ -35,15 +31,19 @@
 static void SuspendHandler(int sig) {
   int old_errno = errno;
   SaveRegistersInStack();
+
+  // Enable kSigRestart handling, tsan disables signals around signal handlers.
+  sigset_t sigset;
+  sigemptyset(&sigset);
+  pthread_sigmask(SIG_SETMASK, &sigset, 0);
+
   // Acknowledge that thread is saved and suspended
   if (sem_post(&g_thread_suspend_ack_sem) != 0)
     fail("sem_post failed");
 
-  do {
-    g_busy_thread_received_restart = false;
-    if (sigsuspend(&g_suspend_handler_mask) != -1 || errno != EINTR)
-      fail("sigsuspend failed");
-  } while (!g_busy_thread_received_restart);
+  // Wait for wakeup signal.
+  while (!g_busy_thread_received_restart)
+    usleep(100);  // wait for kSigRestart signal
 
   // Acknowledge that thread restarted
   if (sem_post(&g_thread_suspend_ack_sem) != 0)
@@ -59,46 +59,37 @@
 }
 
 static void StopWorld(pthread_t thread) {
-  int result = pthread_kill(thread, kSigSuspend);
-  if (result != 0)
+  if (pthread_kill(thread, kSigSuspend) != 0)
     fail("pthread_kill failed");
 
-  while ((result = sem_wait(&g_thread_suspend_ack_sem)) != 0) {
-    if (result != EINTR) {
+  while (sem_wait(&g_thread_suspend_ack_sem) != 0) {
+    if (errno != EINTR)
       fail("sem_wait failed");
-    }
   }
 }
 
 static void StartWorld(pthread_t thread) {
-  int result = pthread_kill(thread, kSigRestart);
-  if (result != 0)
+  if (pthread_kill(thread, kSigRestart) != 0)
     fail("pthread_kill failed");
 
-  while ((result = sem_wait(&g_thread_suspend_ack_sem)) != 0) {
-    if (result != EINTR) {
+  while (sem_wait(&g_thread_suspend_ack_sem) != 0) {
+    if (errno != EINTR)
       fail("sem_wait failed");
-    }
   }
 }
 
 static void CollectGarbage(pthread_t thread) {
   StopWorld(thread);
   // Walk stacks
-    StartWorld(thread);
+  StartWorld(thread);
 }
 
 static void Init() {
-  if (sigfillset(&g_suspend_handler_mask) != 0)
-    fail("sigfillset failed");
-  if (sigdelset(&g_suspend_handler_mask, kSigRestart) != 0)
-    fail("sigdelset failed");
   if (sem_init(&g_thread_suspend_ack_sem, 0, 0) != 0)
     fail("sem_init failed");
 
   struct sigaction act = {};
   act.sa_flags = SA_RESTART;
-  sigfillset(&act.sa_mask);
   act.sa_handler = &SuspendHandler;
   if (sigaction(kSigSuspend, &act, NULL) != 0)
     fail("sigaction failed");
@@ -118,9 +109,11 @@
 int main(int argc, const char *argv[]) {
   Init();
   pthread_t busy_thread;
-  pthread_create(&busy_thread, NULL, &BusyThread, NULL);
+  if (pthread_create(&busy_thread, NULL, &BusyThread, NULL) != 0)
+    fail("pthread_create failed");
   CollectGarbage(busy_thread);
-  pthread_join(busy_thread, 0);
+  if (pthread_join(busy_thread, 0) != 0)
+    fail("pthread_join failed");
   fprintf(stderr, "DONE\n");
   return 0;
 }
diff --git a/test/tsan/signal_reset.cc b/test/tsan/signal_reset.cc
new file mode 100644
index 0000000..aec98dc
--- /dev/null
+++ b/test/tsan/signal_reset.cc
@@ -0,0 +1,74 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+
+volatile int X;
+int stop;
+
+static void handler(int sig) {
+  (void)sig;
+  if (X != 0)
+    printf("bad");
+}
+
+static void* busy(void *p) {
+  while (__atomic_load_n(&stop, __ATOMIC_RELAXED) == 0) {
+  }
+  return 0;
+}
+
+static void* reset(void *p) {
+  struct sigaction act = {};
+  for (int i = 0; i < 1000000; i++) {
+    act.sa_handler = &handler;
+    if (sigaction(SIGPROF, &act, 0)) {
+      perror("sigaction");
+      exit(1);
+    }
+    act.sa_handler = SIG_IGN;
+    if (sigaction(SIGPROF, &act, 0)) {
+      perror("sigaction");
+      exit(1);
+    }
+  }
+  return 0;
+}
+
+int main() {
+  struct sigaction act = {};
+  act.sa_handler = SIG_IGN;
+  if (sigaction(SIGPROF, &act, 0)) {
+    perror("sigaction");
+    exit(1);
+  }
+
+  itimerval t;
+  t.it_value.tv_sec = 0;
+  t.it_value.tv_usec = 10;
+  t.it_interval = t.it_value;
+  if (setitimer(ITIMER_PROF, &t, 0)) {
+    perror("setitimer");
+    exit(1);
+  }
+
+  pthread_t th[2];
+  pthread_create(&th[0], 0, busy, 0);
+  pthread_create(&th[1], 0, reset, 0);
+
+  pthread_join(th[1], 0);
+  __atomic_store_n(&stop, 1, __ATOMIC_RELAXED);
+  pthread_join(th[0], 0);
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: DONE
+// CHECK-NOT: WARNING: ThreadSanitizer:
diff --git a/test/tsan/signal_segv_handler.cc b/test/tsan/signal_segv_handler.cc
new file mode 100644
index 0000000..2d806ee
--- /dev/null
+++ b/test/tsan/signal_segv_handler.cc
@@ -0,0 +1,39 @@
+// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="flush_memory_ms=1 memory_limit_mb=1" %run %t 2>&1 | FileCheck %s
+
+// JVM uses SEGV to preempt threads. All threads do a load from a known address
+// periodically. When runtime needs to preempt threads, it unmaps the page.
+// Threads start triggering SEGV one by one. The signal handler blocks
+// threads while runtime does its thing. Then runtime maps the page again
+// and resumes the threads.
+// Previously this pattern conflicted with stop-the-world machinery,
+// because it briefly reset SEGV handler to SIG_DFL.
+// As the consequence JVM just silently died.
+
+// This test sets memory flushing rate to maximum, then does series of
+// "benign" SEGVs that are handled by signal handler, and ensures that
+// the process survive.
+
+#include "test.h"
+#include <signal.h>
+#include <sys/mman.h>
+
+void *guard;
+
+void handler(int signo, siginfo_t *info, void *uctx) {
+  mprotect(guard, 4096, PROT_READ | PROT_WRITE);
+}
+
+int main() {
+  struct sigaction a;
+  a.sa_sigaction = handler;
+  a.sa_flags = SA_SIGINFO;
+  sigaction(SIGSEGV, &a, 0);
+  guard = mmap(0, 4096, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
+  for (int i = 0; i < 1000000; i++) {
+    mprotect(guard, 4096, PROT_NONE);
+    *(int*)guard = 1;
+  }
+  fprintf(stderr, "DONE\n");
+}
+
+// CHECK: DONE
diff --git a/test/tsan/signal_sync.cc b/test/tsan/signal_sync.cc
index 15387b7..6ff19d3 100644
--- a/test/tsan/signal_sync.cc
+++ b/test/tsan/signal_sync.cc
@@ -1,11 +1,8 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include "test.h"
 #include <signal.h>
 #include <sys/types.h>
 #include <sys/time.h>
-#include <unistd.h>
 #include <errno.h>
 
 volatile int X;
@@ -18,7 +15,7 @@
 
 static void* thr(void *p) {
   for (int i = 0; i != 1000; i++)
-    usleep(1000);
+    usleep(1000);  // process signals
   return 0;
 }
 
diff --git a/test/tsan/signal_thread.cc b/test/tsan/signal_thread.cc
new file mode 100644
index 0000000..8eda80a
--- /dev/null
+++ b/test/tsan/signal_thread.cc
@@ -0,0 +1,52 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+
+volatile int X;
+
+static void handler(int sig) {
+  (void)sig;
+  if (X != 0)
+    printf("bad");
+}
+
+static void* thr(void *p) {
+  return 0;
+}
+
+int main() {
+  struct sigaction act = {};
+  act.sa_handler = &handler;
+  if (sigaction(SIGPROF, &act, 0)) {
+    perror("sigaction");
+    exit(1);
+  }
+
+  itimerval t;
+  t.it_value.tv_sec = 0;
+  t.it_value.tv_usec = 10;
+  t.it_interval = t.it_value;
+  if (setitimer(ITIMER_PROF, &t, 0)) {
+    perror("setitimer");
+    exit(1);
+  }
+
+  for (int i = 0; i < 10000; i++) {
+    pthread_t th;
+    pthread_create(&th, 0, thr, 0);
+    pthread_join(th, 0);
+  }
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: DONE
+// CHECK-NOT: WARNING: ThreadSanitizer:
diff --git a/test/tsan/signal_write.cc b/test/tsan/signal_write.cc
index 626d87a..edb3d23 100644
--- a/test/tsan/signal_write.cc
+++ b/test/tsan/signal_write.cc
@@ -16,7 +16,7 @@
   act.sa_sigaction = &handler;
   sigaction(SIGPROF, &act, 0);
   kill(getpid(), SIGPROF);
-  sleep(1);
+  sleep(1);  // let the signal handler run, can't use barrier in sig handler
   fprintf(stderr, "DONE\n");
   return 0;
 }
diff --git a/test/tsan/simple_race.c b/test/tsan/simple_race.c
index 7b60c5e..b4234ac 100644
--- a/test/tsan/simple_race.c
+++ b/test/tsan/simple_race.c
@@ -1,22 +1,22 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   Global = 42;
   return NULL;
 }
 
 void *Thread2(void *x) {
   Global = 43;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/simple_race.cc b/test/tsan/simple_race.cc
index 0236b9f..612ce2d 100644
--- a/test/tsan/simple_race.cc
+++ b/test/tsan/simple_race.cc
@@ -1,25 +1,28 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
+#include "test.h"
 
 int Global;
 
 void *Thread1(void *x) {
+  barrier_wait(&barrier);
   Global++;
   return NULL;
 }
 
 void *Thread2(void *x) {
   Global--;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
+  return 0;
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/simple_stack.c b/test/tsan/simple_stack.c
index 8736703..6ef92fb 100644
--- a/test/tsan/simple_stack.c
+++ b/test/tsan/simple_stack.c
@@ -1,7 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 
@@ -24,13 +22,14 @@
 }
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   bar1();
   return NULL;
 }
 
 void *Thread2(void *x) {
   bar2();
+  barrier_wait(&barrier);
   return NULL;
 }
 
@@ -39,6 +38,7 @@
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   StartThread(&t[0], Thread1);
   StartThread(&t[1], Thread2);
@@ -49,18 +49,18 @@
 
 // CHECK:      WARNING: ThreadSanitizer: data race
 // CHECK-NEXT:   Write of size 4 at {{.*}} by thread T1:
-// CHECK-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack.c:9{{(:3)?}} ({{.*}})
-// CHECK-NEXT:     #1 bar1{{.*}} {{.*}}simple_stack.c:14{{(:3)?}} ({{.*}})
-// CHECK-NEXT:     #2 Thread1{{.*}} {{.*}}simple_stack.c:28{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack.c:7{{(:10)?}} ({{.*}})
+// CHECK-NEXT:     #1 bar1{{.*}} {{.*}}simple_stack.c:12{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #2 Thread1{{.*}} {{.*}}simple_stack.c:26{{(:3)?}} ({{.*}})
 // CHECK:        Previous read of size 4 at {{.*}} by thread T2:
-// CHECK-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack.c:18{{(:3)?}} ({{.*}})
-// CHECK-NEXT:     #1 bar2{{.*}} {{.*}}simple_stack.c:23{{(:3)?}} ({{.*}})
-// CHECK-NEXT:     #2 Thread2{{.*}} {{.*}}simple_stack.c:33{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack.c:16{{(:20)?}} ({{.*}})
+// CHECK-NEXT:     #1 bar2{{.*}} {{.*}}simple_stack.c:21{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #2 Thread2{{.*}} {{.*}}simple_stack.c:31{{(:3)?}} ({{.*}})
 // CHECK:        Thread T1 (tid={{.*}}, running) created by main thread at:
 // CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
-// CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:38{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:37{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:43{{(:3)?}} ({{.*}})
 // CHECK:        Thread T2 ({{.*}}) created by main thread at:
 // CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
-// CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:38{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:37{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:44{{(:3)?}} ({{.*}})
diff --git a/test/tsan/simple_stack2.cc b/test/tsan/simple_stack2.cc
index b07d863..20ef729 100644
--- a/test/tsan/simple_stack2.cc
+++ b/test/tsan/simple_stack2.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %T/simple_stack2.cc.exe && %deflake %run %T/simple_stack2.cc.exe | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 
@@ -30,24 +28,26 @@
 }
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   bar1();
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, NULL, Thread1, NULL);
   bar2();
+  barrier_wait(&barrier);
   pthread_join(t, NULL);
 }
 
 // CHECK:      WARNING: ThreadSanitizer: data race
 // CHECK-NEXT:   Write of size 4 at {{.*}} by thread T1:
-// CHECK-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack2.cc:9{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
-// CHECK-NEXT:     #1 bar1{{.*}} {{.*}}simple_stack2.cc:16{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
-// CHECK-NEXT:     #2 Thread1{{.*}} {{.*}}simple_stack2.cc:34{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
+// CHECK-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack2.cc:7{{(:10)?}} (simple_stack2.cc.exe+{{.*}})
+// CHECK-NEXT:     #1 bar1{{.*}} {{.*}}simple_stack2.cc:14{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
+// CHECK-NEXT:     #2 Thread1{{.*}} {{.*}}simple_stack2.cc:32{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
 // CHECK:        Previous read of size 4 at {{.*}} by main thread:
-// CHECK-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack2.cc:20{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
-// CHECK-NEXT:     #1 bar2{{.*}} {{.*}}simple_stack2.cc:29{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
-// CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack2.cc:41{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
+// CHECK-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack2.cc:18{{(:22)?}} (simple_stack2.cc.exe+{{.*}})
+// CHECK-NEXT:     #1 bar2{{.*}} {{.*}}simple_stack2.cc:27{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
+// CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack2.cc:40{{(:3)?}} (simple_stack2.cc.exe+{{.*}})
diff --git a/test/tsan/sleep_sync.cc b/test/tsan/sleep_sync.cc
index c7614e1..b2c6a12 100644
--- a/test/tsan/sleep_sync.cc
+++ b/test/tsan/sleep_sync.cc
@@ -1,23 +1,25 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 int X = 0;
 
 void MySleep() {
-  sleep(1);
+  sleep(1);  // the sleep that must appear in the report
 }
 
 void *Thread(void *p) {
+  barrier_wait(&barrier);
   MySleep();  // Assume the main thread has done the write.
   X = 42;
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   X = 43;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
   return 0;
 }
diff --git a/test/tsan/sleep_sync2.cc b/test/tsan/sleep_sync2.cc
index 4e61699..a1a7a3a 100644
--- a/test/tsan/sleep_sync2.cc
+++ b/test/tsan/sleep_sync2.cc
@@ -1,18 +1,20 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 int X = 0;
 
 void *Thread(void *p) {
   X = 42;
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
-  sleep(1);
+  sleep(1);  // must not appear in the report
   pthread_create(&t, 0, Thread, 0);
+  barrier_wait(&barrier);
   X = 43;
   pthread_join(t, 0);
   return 0;
diff --git a/test/tsan/stack_race.cc b/test/tsan/stack_race.cc
index 2e02f46..1ada295 100644
--- a/test/tsan/stack_race.cc
+++ b/test/tsan/stack_race.cc
@@ -1,19 +1,19 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   *(int*)a = 43;
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   int Var = 42;
   pthread_t t;
   pthread_create(&t, 0, Thread, &Var);
   Var = 43;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/stack_race2.cc b/test/tsan/stack_race2.cc
index 818db36..00e31fb 100644
--- a/test/tsan/stack_race2.cc
+++ b/test/tsan/stack_race2.cc
@@ -1,10 +1,8 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread2(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   *(int*)a = 43;
   return 0;
 }
@@ -14,11 +12,13 @@
   pthread_t t;
   pthread_create(&t, 0, Thread2, &Var);
   Var = 42;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   pthread_join(t, 0);
diff --git a/test/tsan/stack_sync_reuse.cc b/test/tsan/stack_sync_reuse.cc
new file mode 100644
index 0000000..5ea9e84
--- /dev/null
+++ b/test/tsan/stack_sync_reuse.cc
@@ -0,0 +1,65 @@
+// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+// Test case https://code.google.com/p/thread-sanitizer/issues/detail?id=87
+// Tsan sees false HB edge on address pointed to by syncp variable.
+// It is false because when acquire is done syncp points to a var in one frame,
+// and during release it points to a var in a different frame.
+// The code is somewhat tricky because it prevents compiler from optimizing
+// our accesses away, structured to not introduce other data races and
+// not introduce other synchronization, and to arrange the vars in different
+// frames to occupy the same address.
+
+// The data race CHECK-NOT below actually must be CHECK, because the program
+// does contain the data race on global.
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
+
+long global;
+long *syncp;
+long *addr;
+long sink;
+
+void *Thread(void *x) {
+  while (__atomic_load_n(&syncp, __ATOMIC_ACQUIRE) == 0)
+    usleep(1000);  // spin wait
+  global = 42;
+  __atomic_store_n(syncp, 1, __ATOMIC_RELEASE);
+  __atomic_store_n(&syncp, 0, __ATOMIC_RELAXED);
+  return NULL;
+}
+
+void __attribute__((noinline)) foobar() {
+  long s;
+  addr = &s;
+  __atomic_store_n(&s, 0, __ATOMIC_RELAXED);
+  __atomic_store_n(&syncp, &s, __ATOMIC_RELEASE);
+  while (__atomic_load_n(&syncp, __ATOMIC_RELAXED) != 0)
+    usleep(1000);  // spin wait
+}
+
+void __attribute__((noinline)) barfoo() {
+  long s;
+  if (addr != &s) {
+    printf("address mismatch addr=%p &s=%p\n", addr, &s);
+    exit(1);
+  }
+  __atomic_store_n(&addr, &s, __ATOMIC_RELAXED);
+  __atomic_store_n(&s, 0, __ATOMIC_RELAXED);
+  sink = __atomic_load_n(&s, __ATOMIC_ACQUIRE);
+  global = 43;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  foobar();
+  barfoo();
+  pthread_join(t, 0);
+  if (sink != 0)
+    exit(1);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
diff --git a/test/tsan/suppress_same_address.cc b/test/tsan/suppress_same_address.cc
index df19da1..3ec13ee 100644
--- a/test/tsan/suppress_same_address.cc
+++ b/test/tsan/suppress_same_address.cc
@@ -1,11 +1,10 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 volatile int X;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   X = 42;
   X = 66;
   X = 78;
@@ -16,10 +15,12 @@
   X = 11;
   X = 99;
   X = 73;
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread1, 0);
   Thread2(0);
diff --git a/test/tsan/suppressions_global.cc b/test/tsan/suppressions_global.cc
index c808a63..c7b9bb9 100644
--- a/test/tsan/suppressions_global.cc
+++ b/test/tsan/suppressions_global.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS suppressions='%s.supp'" %run %t 2>&1 | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 
diff --git a/test/tsan/suppressions_race.cc b/test/tsan/suppressions_race.cc
index 1d72874..45c3048 100644
--- a/test/tsan/suppressions_race.cc
+++ b/test/tsan/suppressions_race.cc
@@ -1,22 +1,22 @@
-// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS suppressions='%s.supp'" %run %t 2>&1 | FileCheck %s
+#include "test.h"
 
 int Global;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   Global = 42;
   return NULL;
 }
 
 void *Thread2(void *x) {
   Global = 43;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/suppressions_race2.cc b/test/tsan/suppressions_race2.cc
index 4ababdd..24ecd8e 100644
--- a/test/tsan/suppressions_race2.cc
+++ b/test/tsan/suppressions_race2.cc
@@ -1,22 +1,22 @@
-// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS suppressions='%s.supp'" %run %t 2>&1 | FileCheck %s
+#include "test.h"
 
 int Global;
 
 void *Thread1(void *x) {
   Global = 42;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   Global = 43;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/test.h b/test/tsan/test.h
new file mode 100644
index 0000000..bb861b0
--- /dev/null
+++ b/test/tsan/test.h
@@ -0,0 +1,40 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <stddef.h>
+
+// TSan-invisible barrier.
+// Tests use it to establish necessary execution order in a way that does not
+// interfere with tsan (does not establish synchronization between threads).
+__typeof(pthread_barrier_wait) *barrier_wait;
+
+void barrier_init(pthread_barrier_t *barrier, unsigned count) {
+  if (barrier_wait == 0) {
+    void *h = dlopen("libpthread.so.0", RTLD_LAZY);
+    if (h == 0) {
+      fprintf(stderr, "failed to dlopen libpthread.so.0, exiting\n");
+      exit(1);
+    }
+    barrier_wait = (__typeof(barrier_wait))dlsym(h, "pthread_barrier_wait");
+    if (barrier_wait == 0) {
+      fprintf(stderr, "failed to resolve pthread_barrier_wait, exiting\n");
+      exit(1);
+    }
+  }
+  pthread_barrier_init(barrier, 0, count);
+}
+
+// Default instance of the barrier, but a test can declare more manually.
+pthread_barrier_t barrier;
+
+void print_address(void *address) {
+// On FreeBSD, the %p conversion specifier works as 0x%x and thus does not match
+// to the format used in the diagnotic message.
+#ifdef __x86_64__
+  fprintf(stderr, "0x%012lx", (unsigned long) address);
+#elif defined(__mips64)
+  fprintf(stderr, "0x%010lx", (unsigned long) address);
+#endif
+}
diff --git a/test/tsan/thread_detach.c b/test/tsan/thread_detach.c
index 32cf641..802d8de 100644
--- a/test/tsan/thread_detach.c
+++ b/test/tsan/thread_detach.c
@@ -1,16 +1,16 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread(void *x) {
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
-  sleep(1);
+  barrier_wait(&barrier);
   pthread_detach(t);
   printf("PASS\n");
   return 0;
diff --git a/test/tsan/thread_leak3.c b/test/tsan/thread_leak3.c
index f4db484..c09fb71 100644
--- a/test/tsan/thread_leak3.c
+++ b/test/tsan/thread_leak3.c
@@ -1,15 +1,17 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread(void *x) {
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
-  sleep(1);
+  barrier_wait(&barrier);
+  sleep(1);  // wait for the thread to finish and exit
   return 0;
 }
 
diff --git a/test/tsan/thread_leak4.c b/test/tsan/thread_leak4.c
index 0d3b830..1ebca58 100644
--- a/test/tsan/thread_leak4.c
+++ b/test/tsan/thread_leak4.c
@@ -1,18 +1,18 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
-#include <stdio.h>
+#include "test.h"
 
 void *Thread(void *x) {
-  sleep(10);
+  sleep(100);  // leave the thread "running"
   return 0;
 }
 
 int main() {
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
-  printf("OK\n");
+  printf("DONE\n");
   return 0;
 }
 
+// CHECK: DONE
 // CHECK-NOT: WARNING: ThreadSanitizer: thread leak
+
diff --git a/test/tsan/thread_leak5.c b/test/tsan/thread_leak5.c
index ca244a9..acdbd1d 100644
--- a/test/tsan/thread_leak5.c
+++ b/test/tsan/thread_leak5.c
@@ -1,18 +1,20 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread(void *x) {
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
   volatile int N = 5;  // prevent loop unrolling
+  barrier_init(&barrier, N + 1);
   for (int i = 0; i < N; i++) {
     pthread_t t;
     pthread_create(&t, 0, Thread, 0);
   }
-  sleep(1);
+  barrier_wait(&barrier);
+  sleep(1);  // wait for the threads to finish and exit
   return 0;
 }
 
diff --git a/test/tsan/thread_name.cc b/test/tsan/thread_name.cc
index a790c66..80d30b8 100644
--- a/test/tsan/thread_name.cc
+++ b/test/tsan/thread_name.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 #if defined(__linux__)
 #define USE_PTHREAD_SETNAME_NP __GLIBC_PREREQ(2, 12)
@@ -18,7 +16,7 @@
 int Global;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   AnnotateThreadName(__FILE__, __LINE__, "Thread1");
   Global++;
   return NULL;
@@ -31,10 +29,12 @@
   AnnotateThreadName(__FILE__, __LINE__, "Thread2");
 #endif
   Global--;
+  barrier_wait(&barrier);
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/thread_name2.cc b/test/tsan/thread_name2.cc
index 6a3dafe..a44f4b9 100644
--- a/test/tsan/thread_name2.cc
+++ b/test/tsan/thread_name2.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 #if defined(__FreeBSD__)
 #include <pthread_np.h>
@@ -11,7 +9,7 @@
 int Global;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   Global++;
   return 0;
 }
@@ -19,14 +17,17 @@
 void *Thread2(void *x) {
   pthread_setname_np(pthread_self(), "foobar2");
   Global--;
+  barrier_wait(&barrier);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 3);
   pthread_t t[2];
   pthread_create(&t[0], 0, Thread1, 0);
   pthread_create(&t[1], 0, Thread2, 0);
   pthread_setname_np(t[0], "foobar1");
+  barrier_wait(&barrier);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
 }
diff --git a/test/tsan/tiny_race.c b/test/tsan/tiny_race.c
index c10eab1..b6937fe 100644
--- a/test/tsan/tiny_race.c
+++ b/test/tsan/tiny_race.c
@@ -1,19 +1,20 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 int Global;
 
 void *Thread1(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   Global = 42;
   return x;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread1, 0);
   Global = 43;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
   return Global;
 }
diff --git a/test/tsan/tls_race.cc b/test/tsan/tls_race.cc
index 1858934..5e81722 100644
--- a/test/tsan/tls_race.cc
+++ b/test/tsan/tls_race.cc
@@ -1,19 +1,19 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   *(int*)a = 43;
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   static __thread int Var = 42;
   pthread_t t;
   pthread_create(&t, 0, Thread, &Var);
   Var = 43;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/tls_race2.cc b/test/tsan/tls_race2.cc
index 0ca629a..d0f7b03 100644
--- a/test/tsan/tls_race2.cc
+++ b/test/tsan/tls_race2.cc
@@ -1,10 +1,8 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stddef.h>
-#include <unistd.h>
+#include "test.h"
 
 void *Thread2(void *a) {
-  sleep(1);
+  barrier_wait(&barrier);
   *(int*)a = 43;
   return 0;
 }
@@ -14,11 +12,13 @@
   pthread_t t;
   pthread_create(&t, 0, Thread2, &Var);
   Var = 42;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   pthread_join(t, 0);
diff --git a/test/tsan/unaligned_norace.cc b/test/tsan/unaligned_norace.cc
index 20cb545..94df1cf 100644
--- a/test/tsan/unaligned_norace.cc
+++ b/test/tsan/unaligned_norace.cc
@@ -7,20 +7,20 @@
 uint64_t objs[8*3*3*2][3];
 
 extern "C" {
-uint16_t __tsan_unaligned_read2(void *addr);
-uint32_t __tsan_unaligned_read4(void *addr);
-uint64_t __tsan_unaligned_read8(void *addr);
-void __tsan_unaligned_write2(void *addr, uint16_t v);
-void __tsan_unaligned_write4(void *addr, uint32_t v);
-void __tsan_unaligned_write8(void *addr, uint64_t v);
+void __tsan_unaligned_read2(void *addr);
+void __tsan_unaligned_read4(void *addr);
+void __tsan_unaligned_read8(void *addr);
+void __tsan_unaligned_write2(void *addr);
+void __tsan_unaligned_write4(void *addr);
+void __tsan_unaligned_write8(void *addr);
 }
 
 static void access(char *p, int sz, int rw) {
   if (rw) {
     switch (sz) {
-    case 0: __tsan_unaligned_write2(p, 0); break;
-    case 1: __tsan_unaligned_write4(p, 0); break;
-    case 2: __tsan_unaligned_write8(p, 0); break;
+    case 0: __tsan_unaligned_write2(p); break;
+    case 1: __tsan_unaligned_write4(p); break;
+    case 2: __tsan_unaligned_write8(p); break;
     default: exit(1);
     }
   } else {
diff --git a/test/tsan/unaligned_race.cc b/test/tsan/unaligned_race.cc
index 6e9b5a3..030642a 100644
--- a/test/tsan/unaligned_race.cc
+++ b/test/tsan/unaligned_race.cc
@@ -1,9 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include "test.h"
 #include <stdint.h>
-#include <unistd.h>
 
 #define NOINLINE __attribute__((noinline))
 
@@ -123,15 +120,17 @@
 
 void *Thread(void *p) {
   (void)p;
-  sleep(1);
+  barrier_wait(&barrier);
   Test(false);
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t th;
   pthread_create(&th, 0, Thread, 0);
   Test(true);
+  barrier_wait(&barrier);
   pthread_join(th, 0);
 }
 
diff --git a/test/tsan/vptr_harmful_race.cc b/test/tsan/vptr_harmful_race.cc
index 68e12e8..d15b396 100644
--- a/test/tsan/vptr_harmful_race.cc
+++ b/test/tsan/vptr_harmful_race.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
+#include "test.h"
 #include <semaphore.h>
-#include <stdio.h>
-#include <unistd.h>
 
 struct A {
   A() {
@@ -31,16 +29,18 @@
 void *Thread1(void *x) {
   obj->F();
   obj->Done();
+  barrier_wait(&barrier);
   return NULL;
 }
 
 void *Thread2(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   delete obj;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/vptr_harmful_race2.cc b/test/tsan/vptr_harmful_race2.cc
index aa53bbb..a56b74c 100644
--- a/test/tsan/vptr_harmful_race2.cc
+++ b/test/tsan/vptr_harmful_race2.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
+#include "test.h"
 #include <semaphore.h>
-#include <stdio.h>
-#include <unistd.h>
 
 struct A {
   A() {
@@ -29,18 +27,20 @@
 static A *obj = new B;
 
 void *Thread1(void *x) {
-  sleep(1);
   obj->F();
+  barrier_wait(&barrier);
   obj->Done();
   return NULL;
 }
 
 void *Thread2(void *x) {
+  barrier_wait(&barrier);
   delete obj;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/vptr_harmful_race3.cc b/test/tsan/vptr_harmful_race3.cc
index ac6ea94..3810a10 100644
--- a/test/tsan/vptr_harmful_race3.cc
+++ b/test/tsan/vptr_harmful_race3.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
+#include "test.h"
 #include <semaphore.h>
-#include <stdio.h>
-#include <unistd.h>
 
 struct A {
   A() {
@@ -30,18 +28,20 @@
 static void (A::*fn)() = &A::F;
 
 void *Thread1(void *x) {
-  sleep(1);
   (obj->*fn)();
+  barrier_wait(&barrier);
   obj->Done();
   return NULL;
 }
 
 void *Thread2(void *x) {
+  barrier_wait(&barrier);
   delete obj;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
   pthread_create(&t[1], NULL, Thread2, NULL);
diff --git a/test/tsan/vptr_harmful_race4.cc b/test/tsan/vptr_harmful_race4.cc
index 969c9d5..543514d 100644
--- a/test/tsan/vptr_harmful_race4.cc
+++ b/test/tsan/vptr_harmful_race4.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
+#include "test.h"
 
 struct A {
   virtual void F() {
@@ -17,16 +15,18 @@
 };
 
 void *Thread(void *x) {
-  sleep(1);
+  barrier_wait(&barrier);
   ((A*)x)->F();
   return 0;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   A *obj = new B;
   pthread_t t;
   pthread_create(&t, 0, Thread, obj);
   delete obj;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
 }
 
diff --git a/test/tsan/write_in_reader_lock.cc b/test/tsan/write_in_reader_lock.cc
index 5588213..3f7cb35 100644
--- a/test/tsan/write_in_reader_lock.cc
+++ b/test/tsan/write_in_reader_lock.cc
@@ -1,6 +1,5 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
+#include "test.h"
 
 pthread_rwlock_t rwlock;
 int GLOB;
@@ -8,14 +7,15 @@
 void *Thread1(void *p) {
   (void)p;
   pthread_rwlock_rdlock(&rwlock);
+  barrier_wait(&barrier);
   // Write under reader lock.
-  sleep(1);
   GLOB++;
   pthread_rwlock_unlock(&rwlock);
   return 0;
 }
 
 int main(int argc, char *argv[]) {
+  barrier_init(&barrier, 2);
   pthread_rwlock_init(&rwlock, NULL);
   pthread_rwlock_rdlock(&rwlock);
   pthread_t t;
@@ -23,6 +23,7 @@
   volatile int x = GLOB;
   (void)x;
   pthread_rwlock_unlock(&rwlock);
+  barrier_wait(&barrier);
   pthread_join(t, 0);
   pthread_rwlock_destroy(&rwlock);
   return 0;
@@ -30,6 +31,6 @@
 
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK:   Write of size 4 at {{.*}} by thread T1{{.*}}:
-// CHECK:     #0 Thread1(void*) {{.*}}write_in_reader_lock.cc:13
+// CHECK:     #0 Thread1(void*) {{.*}}write_in_reader_lock.cc:12
 // CHECK:   Previous read of size 4 at {{.*}} by main thread{{.*}}:
 // CHECK:     #0 main {{.*}}write_in_reader_lock.cc:23