diff --git a/Android.mk b/Android.mk
index e69de29..bf9670d 100644
--- a/Android.mk
+++ b/Android.mk
@@ -0,0 +1,24 @@
+# Copyright (C) 2013 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+
+# unittest can be built with "mmm external/valgrind/unittest"
+subdirs := $(addprefix $(LOCAL_PATH)/,$(addsuffix /Android.mk, \
+        main \
+    ))
+
+include $(subdirs)
+
+
diff --git a/dynamic_annotations/Android.mk b/dynamic_annotations/Android.mk
deleted file mode 100644
index 44edfcb..0000000
--- a/dynamic_annotations/Android.mk
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (C) 2011 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-LOCAL_PATH:= $(call my-dir)
-include $(CLEAR_VARS)
-
-local_cflags := -DDYNAMIC_ANNOTATIONS_ENABLED=1
-local_src_files := dynamic_annotations.c
-
-LOCAL_MODULE := libdynamic_annotations
-LOCAL_MODULE_TAGS := optional
-LOCAL_ARM_MODE := arm
-LOCAL_CFLAGS += $(local_cflags)
-LOCAL_SRC_FILES := $(local_src_files)
-LOCAL_PRELINK_MODULE := false
-
-# Remove this when the all toolchains are GCC 4.4
-ifeq ($(TARGET_ARCH),arm)
-  LOCAL_LDFLAGS += -Wl,--icf=none
-endif
-
-include $(BUILD_SHARED_LIBRARY)
-
-
-include $(CLEAR_VARS)
-LOCAL_MODULE := libdynamic_annotations-host
-LOCAL_MODULE_TAGS := optional
-LOCAL_CFLAGS += $(local_cflags)
-LOCAL_SRC_FILES := $(local_src_files)
-include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/dynamic_annotations/MODULE_LICENSE_BSD_LIKE b/dynamic_annotations/MODULE_LICENSE_BSD_LIKE
deleted file mode 100644
index e69de29..0000000
--- a/dynamic_annotations/MODULE_LICENSE_BSD_LIKE
+++ /dev/null
diff --git a/dynamic_annotations/dynamic_annotations.c b/dynamic_annotations/dynamic_annotations.c
deleted file mode 100644
index 607eac5..0000000
--- a/dynamic_annotations/dynamic_annotations.c
+++ /dev/null
@@ -1,181 +0,0 @@
-/* Copyright (c) 2011, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef _MSC_VER
-# include <windows.h>
-#endif
-
-#ifdef __cplusplus
-# error "This file should be built as pure C to avoid name mangling"
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "dynamic_annotations.h"
-
-#ifdef __GNUC__
-/* valgrind.h uses gcc extensions so it won't build with other compilers */
-# include "third_party/valgrind/valgrind.h"
-#endif
-
-/* Each function is empty and called (via a macro) only in debug mode.
-   The arguments are captured by dynamic tools at runtime. */
-
-#if DYNAMIC_ANNOTATIONS_ENABLED == 1
-
-/* Identical code folding(-Wl,--icf=all) countermeasures.
-   This makes all Annotate* functions different, which prevents the linker from folding them. */
-#ifdef __COUNTER__
-#define DYNAMIC_ANNOTATIONS_IMPL volatile short lineno = (__LINE__ << 8) + __COUNTER__; (void)lineno;
-#else
-#define DYNAMIC_ANNOTATIONS_IMPL volatile short lineno = (__LINE__ << 8); (void)lineno;
-#endif
-
-/* WARNING: always add new annotations to the end of the list.
-   Otherwise, lineno (see above) numbers for different Annotate* functions may conflict. */
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockCreate)(
-    const char *file, int line, const volatile void *lock) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockDestroy)(
-    const char *file, int line, const volatile void *lock) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockAcquired)(
-    const char *file, int line, const volatile void *lock, long is_w) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockReleased)(
-    const char *file, int line, const volatile void *lock, long is_w) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierInit)(
-    const char *file, int line, const volatile void *barrier, long count,
-    long reinitialization_allowed)  {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierWaitBefore)(
-    const char *file, int line, const volatile void *barrier)  {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierWaitAfter)(
-    const char *file, int line, const volatile void *barrier)  {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierDestroy)(
-    const char *file, int line, const volatile void *barrier)  {DYNAMIC_ANNOTATIONS_IMPL}
-
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarWait)(
-    const char *file, int line, const volatile void *cv,
-    const volatile void *lock) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarSignal)(
-    const char *file, int line, const volatile void *cv) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarSignalAll)(
-    const char *file, int line, const volatile void *cv) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateHappensBefore)(
-    const char *file, int line, const volatile void *obj)  {DYNAMIC_ANNOTATIONS_IMPL};
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateHappensAfter)(
-    const char *file, int line, const volatile void *obj)  {DYNAMIC_ANNOTATIONS_IMPL};
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePublishMemoryRange)(
-    const char *file, int line, const volatile void *address, long size) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateUnpublishMemoryRange)(
-    const char *file, int line, const volatile void *address, long size) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQCreate)(
-    const char *file, int line, const volatile void *pcq) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQDestroy)(
-    const char *file, int line, const volatile void *pcq) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQPut)(
-    const char *file, int line, const volatile void *pcq) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQGet)(
-    const char *file, int line, const volatile void *pcq) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateNewMemory)(
-    const char *file, int line, const volatile void *mem, long size) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateExpectRace)(
-    const char *file, int line, const volatile void *mem,
-    const char *description) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateFlushExpectedRaces)(
-    const char *file, int line) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBenignRace)(
-    const char *file, int line, const volatile void *mem,
-    const char *description) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBenignRaceSized)(
-    const char *file, int line, const volatile void *mem, long size,
-    const char *description) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateMutexIsUsedAsCondVar)(
-    const char *file, int line, const volatile void *mu) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateMutexIsNotPHB)(
-    const char *file, int line, const volatile void *mu) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateTraceMemory)(
-    const char *file, int line, const volatile void *arg) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateThreadName)(
-    const char *file, int line, const char *name) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreReadsBegin)(
-    const char *file, int line) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreReadsEnd)(
-    const char *file, int line) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreWritesBegin)(
-    const char *file, int line) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreWritesEnd)(
-    const char *file, int line) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreSyncBegin)(
-    const char *file, int line) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreSyncEnd)(
-    const char *file, int line) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateEnableRaceDetection)(
-    const char *file, int line, int enable) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateNoOp)(
-    const char *file, int line, const volatile void *arg) {DYNAMIC_ANNOTATIONS_IMPL}
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateFlushState)(
-    const char *file, int line) {DYNAMIC_ANNOTATIONS_IMPL}
-
-#endif  /* DYNAMIC_ANNOTATIONS_ENABLED == 1 */
-
-#if DYNAMIC_ANNOTATIONS_PROVIDE_RUNNING_ON_VALGRIND == 1
-static int GetRunningOnValgrind(void) {
-#ifdef RUNNING_ON_VALGRIND
-  if (RUNNING_ON_VALGRIND) return 1;
-#endif
-
-#ifndef _MSC_VER
-  char *running_on_valgrind_str = getenv("RUNNING_ON_VALGRIND");
-  if (running_on_valgrind_str) {
-    return strcmp(running_on_valgrind_str, "0") != 0;
-  }
-#else
-  /* Visual Studio issues warnings if we use getenv,
-   * so we use GetEnvironmentVariableA instead.
-   */
-  char value[100] = "1";
-  int res = GetEnvironmentVariableA("RUNNING_ON_VALGRIND",
-                                    value, sizeof(value));
-  /* value will remain "1" if res == 0 or res >= sizeof(value). The latter
-   * can happen only if the given value is long, in this case it can't be "0".
-   */
-  if (res > 0 && strcmp(value, "0") != 0)
-    return 1;
-#endif
-  return 0;
-}
-
-/* See the comments in dynamic_annotations.h */
-int RunningOnValgrind(void) {
-  static volatile int running_on_valgrind = -1;
-  /* C doesn't have thread-safe initialization of statics, and we
-     don't want to depend on pthread_once here, so hack it. */
-  int local_running_on_valgrind = running_on_valgrind;
-  if (local_running_on_valgrind == -1)
-    running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
-  return local_running_on_valgrind;
-}
-
-#endif /* DYNAMIC_ANNOTATIONS_PROVIDE_RUNNING_ON_VALGRIND == 1 */
diff --git a/dynamic_annotations/dynamic_annotations.h b/dynamic_annotations/dynamic_annotations.h
deleted file mode 100644
index f10e27f..0000000
--- a/dynamic_annotations/dynamic_annotations.h
+++ /dev/null
@@ -1,595 +0,0 @@
-/* Copyright (c) 2011, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* This file defines dynamic annotations for use with dynamic analysis
-   tool such as valgrind, PIN, etc.
-
-   Dynamic annotation is a source code annotation that affects
-   the generated code (that is, the annotation is not a comment).
-   Each such annotation is attached to a particular
-   instruction and/or to a particular object (address) in the program.
-
-   The annotations that should be used by users are macros in all upper-case
-   (e.g., ANNOTATE_NEW_MEMORY).
-
-   Actual implementation of these macros may differ depending on the
-   dynamic analysis tool being used.
-
-   See http://code.google.com/p/data-race-test/  for more information.
-
-   This file supports the following dynamic analysis tools:
-   - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero).
-      Macros are defined empty.
-   - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1).
-      Macros are defined as calls to non-inlinable empty functions
-      that are intercepted by Valgrind. */
-
-#ifndef __DYNAMIC_ANNOTATIONS_H__
-#define __DYNAMIC_ANNOTATIONS_H__
-
-#ifndef DYNAMIC_ANNOTATIONS_PREFIX
-# define DYNAMIC_ANNOTATIONS_PREFIX
-#endif
-
-#ifndef DYNAMIC_ANNOTATIONS_PROVIDE_RUNNING_ON_VALGRIND
-# define DYNAMIC_ANNOTATIONS_PROVIDE_RUNNING_ON_VALGRIND 1
-#endif
-
-#ifdef DYNAMIC_ANNOTATIONS_WANT_ATTRIBUTE_WEAK
-# ifdef __GNUC__
-#  define DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK __attribute__((weak))
-# else
-/* TODO(glider): for Windows support we may want to change this macro in order
-   to prepend __declspec(selectany) to the annotations' declarations. */
-#  error weak annotations are not supported for your compiler
-# endif
-#else
-# define DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK
-#endif
-
-/* The following preprocessor magic prepends the value of
-   DYNAMIC_ANNOTATIONS_PREFIX to annotation function names. */
-#define DYNAMIC_ANNOTATIONS_GLUE0(A, B) A##B
-#define DYNAMIC_ANNOTATIONS_GLUE(A, B) DYNAMIC_ANNOTATIONS_GLUE0(A, B)
-#define DYNAMIC_ANNOTATIONS_NAME(name) \
-  DYNAMIC_ANNOTATIONS_GLUE(DYNAMIC_ANNOTATIONS_PREFIX, name)
-
-#ifndef DYNAMIC_ANNOTATIONS_ENABLED
-# define DYNAMIC_ANNOTATIONS_ENABLED 0
-#endif
-
-#if DYNAMIC_ANNOTATIONS_ENABLED != 0
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing condition variables such as CondVar,
-     using conditional critical sections (Await/LockWhen) and when constructing
-     user-defined synchronization mechanisms.
-
-     The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can
-     be used to define happens-before arcs in user-defined synchronization
-     mechanisms:  the race detector will infer an arc from the former to the
-     latter when they share the same argument pointer.
-
-     Example 1 (reference counting):
-
-     void Unref() {
-       ANNOTATE_HAPPENS_BEFORE(&refcount_);
-       if (AtomicDecrementByOne(&refcount_) == 0) {
-         ANNOTATE_HAPPENS_AFTER(&refcount_);
-         delete this;
-       }
-     }
-
-     Example 2 (message queue):
-
-     void MyQueue::Put(Type *e) {
-       MutexLock lock(&mu_);
-       ANNOTATE_HAPPENS_BEFORE(e);
-       PutElementIntoMyQueue(e);
-     }
-
-     Type *MyQueue::Get() {
-       MutexLock lock(&mu_);
-       Type *e = GetElementFromMyQueue();
-       ANNOTATE_HAPPENS_AFTER(e);
-       return e;
-     }
-
-     Note: when possible, please use the existing reference counting and message
-     queue implementations instead of inventing new ones. */
-
-  /* Report that wait on the condition variable at address "cv" has succeeded
-     and the lock at address "lock" is held. */
-  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarWait)(__FILE__, __LINE__, cv, lock)
-
-  /* Report that wait on the condition variable at "cv" has succeeded.  Variant
-     w/o lock. */
-  #define ANNOTATE_CONDVAR_WAIT(cv) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarWait)(__FILE__, __LINE__, cv, NULL)
-
-  /* Report that we are about to signal on the condition variable at address
-     "cv". */
-  #define ANNOTATE_CONDVAR_SIGNAL(cv) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarSignal)(__FILE__, __LINE__, cv)
-
-  /* Report that we are about to signal_all on the condition variable at address
-     "cv". */
-  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarSignalAll)(__FILE__, __LINE__, cv)
-
-  /* Annotations for user-defined synchronization mechanisms. */
-  #define ANNOTATE_HAPPENS_BEFORE(obj) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateHappensBefore)(__FILE__, __LINE__, obj)
-  #define ANNOTATE_HAPPENS_AFTER(obj) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateHappensAfter)(__FILE__, __LINE__, obj)
-
-  /* DEPRECATED. Don't use it. */
-  #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotatePublishMemoryRange)(__FILE__, __LINE__, \
-        pointer, size)
-
-  /* DEPRECATED. Don't use it. */
-  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateUnpublishMemoryRange)(__FILE__, __LINE__, \
-        pointer, size)
-
-  /* DEPRECATED. Don't use it. */
-  #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size)   \
-    do {                                              \
-      ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \
-      ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size);   \
-    } while (0)
-
-  /* Instruct the tool to create a happens-before arc between mu->Unlock() and
-     mu->Lock(). This annotation may slow down the race detector and hide real
-     races. Normally it is used only when it would be difficult to annotate each
-     of the mutex's critical sections individually using the annotations above.
-     This annotation makes sense only for hybrid race detectors. For pure
-     happens-before detectors this is a no-op. For more details see
-     http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */
-  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateMutexIsUsedAsCondVar)(__FILE__, __LINE__, \
-        mu)
-
-  /* Opposite to ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX.
-     Instruct the tool to NOT create h-b arcs between Unlock and Lock, even in
-     pure happens-before mode. For a hybrid mode this is a no-op. */
-  #define ANNOTATE_NOT_HAPPENS_BEFORE_MUTEX(mu) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateMutexIsNotPHB)(__FILE__, __LINE__, mu)
-
-  /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */
-  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateMutexIsUsedAsCondVar)(__FILE__, __LINE__, \
-        mu)
-
-  /* -------------------------------------------------------------
-     Annotations useful when defining memory allocators, or when memory that
-     was protected in one way starts to be protected in another. */
-
-  /* Report that a new memory at "address" of size "size" has been allocated.
-     This might be used when the memory has been retrieved from a free list and
-     is about to be reused, or when a the locking discipline for a variable
-     changes. */
-  #define ANNOTATE_NEW_MEMORY(address, size) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateNewMemory)(__FILE__, __LINE__, address, \
-        size)
-
-  /* -------------------------------------------------------------
-     Annotations useful when defining FIFO queues that transfer data between
-     threads. */
-
-  /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at
-     address "pcq" has been created.  The ANNOTATE_PCQ_* annotations
-     should be used only for FIFO queues.  For non-FIFO queues use
-     ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */
-  #define ANNOTATE_PCQ_CREATE(pcq) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQCreate)(__FILE__, __LINE__, pcq)
-
-  /* Report that the queue at address "pcq" is about to be destroyed. */
-  #define ANNOTATE_PCQ_DESTROY(pcq) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQDestroy)(__FILE__, __LINE__, pcq)
-
-  /* Report that we are about to put an element into a FIFO queue at address
-     "pcq". */
-  #define ANNOTATE_PCQ_PUT(pcq) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQPut)(__FILE__, __LINE__, pcq)
-
-  /* Report that we've just got an element from a FIFO queue at address
-     "pcq". */
-  #define ANNOTATE_PCQ_GET(pcq) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQGet)(__FILE__, __LINE__, pcq)
-
-  /* -------------------------------------------------------------
-     Annotations that suppress errors.  It is usually better to express the
-     program's synchronization using the other annotations, but these can
-     be used when all else fails. */
-
-  /* Report that we may have a benign race at "pointer", with size
-     "sizeof(*(pointer))". "pointer" must be a non-void* pointer.  Insert at the
-     point where "pointer" has been allocated, preferably close to the point
-     where the race happens.  See also ANNOTATE_BENIGN_RACE_STATIC. */
-  #define ANNOTATE_BENIGN_RACE(pointer, description) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateBenignRaceSized)(__FILE__, __LINE__, \
-        pointer, sizeof(*(pointer)), description)
-
-  /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to
-     the memory range [address, address+size). */
-  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateBenignRaceSized)(__FILE__, __LINE__, \
-        address, size, description)
-
-  /* Request the analysis tool to ignore all reads in the current thread
-     until ANNOTATE_IGNORE_READS_END is called.
-     Useful to ignore intentional racey reads, while still checking
-     other reads and all writes.
-     See also ANNOTATE_UNPROTECTED_READ. */
-  #define ANNOTATE_IGNORE_READS_BEGIN() \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreReadsBegin)(__FILE__, __LINE__)
-
-  /* Stop ignoring reads. */
-  #define ANNOTATE_IGNORE_READS_END() \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreReadsEnd)(__FILE__, __LINE__)
-
-  /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */
-  #define ANNOTATE_IGNORE_WRITES_BEGIN() \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreWritesBegin)(__FILE__, __LINE__)
-
-  /* Stop ignoring writes. */
-  #define ANNOTATE_IGNORE_WRITES_END() \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreWritesEnd)(__FILE__, __LINE__)
-
-  /* Start ignoring all memory accesses (reads and writes). */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \
-    do {\
-      ANNOTATE_IGNORE_READS_BEGIN();\
-      ANNOTATE_IGNORE_WRITES_BEGIN();\
-    }while(0)\
-
-  /* Stop ignoring all memory accesses. */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \
-    do {\
-      ANNOTATE_IGNORE_WRITES_END();\
-      ANNOTATE_IGNORE_READS_END();\
-    }while(0)\
-
-  /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore synchronization events:
-     RWLOCK* and CONDVAR*. */
-  #define ANNOTATE_IGNORE_SYNC_BEGIN() \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreSyncBegin)(__FILE__, __LINE__)
-
-  /* Stop ignoring sync events. */
-  #define ANNOTATE_IGNORE_SYNC_END() \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreSyncEnd)(__FILE__, __LINE__)
-
-
-  /* Enable (enable!=0) or disable (enable==0) race detection for all threads.
-     This annotation could be useful if you want to skip expensive race analysis
-     during some period of program execution, e.g. during initialization. */
-  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateEnableRaceDetection)(__FILE__, __LINE__, \
-        enable)
-
-  /* -------------------------------------------------------------
-     Annotations useful for debugging. */
-
-  /* Request to trace every access to "address". */
-  #define ANNOTATE_TRACE_MEMORY(address) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateTraceMemory)(__FILE__, __LINE__, address)
-
-  /* Report the current thread name to a race detector. */
-  #define ANNOTATE_THREAD_NAME(name) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateThreadName)(__FILE__, __LINE__, name)
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing locks.  They are not
-     normally needed by modules that merely use locks.
-     The "lock" argument is a pointer to the lock object. */
-
-  /* Report that a lock has been created at address "lock". */
-  #define ANNOTATE_RWLOCK_CREATE(lock) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockCreate)(__FILE__, __LINE__, lock)
-
-  /* Report that the lock at address "lock" is about to be destroyed. */
-  #define ANNOTATE_RWLOCK_DESTROY(lock) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockDestroy)(__FILE__, __LINE__, lock)
-
-  /* Report that the lock at address "lock" has been acquired.
-     is_w=1 for writer lock, is_w=0 for reader lock. */
-  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockAcquired)(__FILE__, __LINE__, lock, \
-        is_w)
-
-  /* Report that the lock at address "lock" is about to be released. */
-  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockReleased)(__FILE__, __LINE__, lock, \
-        is_w)
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing barriers.  They are not
-     normally needed by modules that merely use barriers.
-     The "barrier" argument is a pointer to the barrier object. */
-
-  /* Report that the "barrier" has been initialized with initial "count".
-   If 'reinitialization_allowed' is true, initialization is allowed to happen
-   multiple times w/o calling barrier_destroy() */
-  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierInit)(__FILE__, __LINE__, barrier, \
-        count, reinitialization_allowed)
-
-  /* Report that we are about to enter barrier_wait("barrier"). */
-  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierWaitBefore)(__FILE__, __LINE__, \
-        barrier)
-
-  /* Report that we just exited barrier_wait("barrier"). */
-  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierWaitAfter)(__FILE__, __LINE__, \
-        barrier)
-
-  /* Report that the "barrier" has been destroyed. */
-  #define ANNOTATE_BARRIER_DESTROY(barrier) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierDestroy)(__FILE__, __LINE__, \
-        barrier)
-
-  /* -------------------------------------------------------------
-     Annotations useful for testing race detectors. */
-
-  /* Report that we expect a race on the variable at "address".
-     Use only in unit tests for a race detector. */
-  #define ANNOTATE_EXPECT_RACE(address, description) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateExpectRace)(__FILE__, __LINE__, address, \
-        description)
-
-  #define ANNOTATE_FLUSH_EXPECTED_RACES() \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateFlushExpectedRaces)(__FILE__, __LINE__)
-
-  /* A no-op. Insert where you like to test the interceptors. */
-  #define ANNOTATE_NO_OP(arg) \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateNoOp)(__FILE__, __LINE__, arg)
-
-  /* Force the race detector to flush its state. The actual effect depends on
-   * the implementation of the detector. */
-  #define ANNOTATE_FLUSH_STATE() \
-    DYNAMIC_ANNOTATIONS_NAME(AnnotateFlushState)(__FILE__, __LINE__)
-
-
-#else  /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
-
-  #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */
-  #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */
-  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */
-  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */
-  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */
-  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */
-  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */
-  #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */
-  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */
-  #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */
-  #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */
-  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */
-  #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */
-  #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */
-  #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */
-  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size)  /* empty */
-  #define ANNOTATE_SWAP_MEMORY_RANGE(address, size)  /* empty */
-  #define ANNOTATE_PCQ_CREATE(pcq) /* empty */
-  #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */
-  #define ANNOTATE_PCQ_PUT(pcq) /* empty */
-  #define ANNOTATE_PCQ_GET(pcq) /* empty */
-  #define ANNOTATE_NEW_MEMORY(address, size) /* empty */
-  #define ANNOTATE_EXPECT_RACE(address, description) /* empty */
-  #define ANNOTATE_FLUSH_EXPECTED_RACES(address, description) /* empty */
-  #define ANNOTATE_BENIGN_RACE(address, description) /* empty */
-  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */
-  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */
-  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */
-  #define ANNOTATE_TRACE_MEMORY(arg) /* empty */
-  #define ANNOTATE_THREAD_NAME(name) /* empty */
-  #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_READS_END() /* empty */
-  #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_WRITES_END() /* empty */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */
-  #define ANNOTATE_IGNORE_SYNC_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_SYNC_END() /* empty */
-  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */
-  #define ANNOTATE_NO_OP(arg) /* empty */
-  #define ANNOTATE_FLUSH_STATE() /* empty */
-
-#endif  /* DYNAMIC_ANNOTATIONS_ENABLED */
-
-/* Use the macros above rather than using these functions directly. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockCreate)(
-    const char *file, int line,
-    const volatile void *lock) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockDestroy)(
-    const char *file, int line,
-    const volatile void *lock) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockAcquired)(
-    const char *file, int line,
-    const volatile void *lock, long is_w) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateRWLockReleased)(
-    const char *file, int line,
-    const volatile void *lock, long is_w) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierInit)(
-    const char *file, int line, const volatile void *barrier, long count,
-    long reinitialization_allowed) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierWaitBefore)(
-    const char *file, int line,
-    const volatile void *barrier) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierWaitAfter)(
-    const char *file, int line,
-    const volatile void *barrier) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBarrierDestroy)(
-    const char *file, int line,
-    const volatile void *barrier) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarWait)(
-    const char *file, int line, const volatile void *cv,
-    const volatile void *lock) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarSignal)(
-    const char *file, int line,
-    const volatile void *cv) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateCondVarSignalAll)(
-    const char *file, int line,
-    const volatile void *cv) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateHappensBefore)(
-    const char *file, int line,
-    const volatile void *obj) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateHappensAfter)(
-    const char *file, int line,
-    const volatile void *obj) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePublishMemoryRange)(
-    const char *file, int line,
-    const volatile void *address, long size) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateUnpublishMemoryRange)(
-    const char *file, int line,
-    const volatile void *address, long size) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQCreate)(
-    const char *file, int line,
-    const volatile void *pcq) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQDestroy)(
-    const char *file, int line,
-    const volatile void *pcq) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQPut)(
-    const char *file, int line,
-    const volatile void *pcq) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotatePCQGet)(
-    const char *file, int line,
-    const volatile void *pcq) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateNewMemory)(
-    const char *file, int line,
-    const volatile void *mem, long size) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateExpectRace)(
-    const char *file, int line, const volatile void *mem,
-    const char *description) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateFlushExpectedRaces)(
-    const char *file, int line) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBenignRace)(
-    const char *file, int line, const volatile void *mem,
-    const char *description) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateBenignRaceSized)(
-    const char *file, int line, const volatile void *mem, long size,
-    const char *description) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateMutexIsUsedAsCondVar)(
-    const char *file, int line,
-    const volatile void *mu) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateMutexIsNotPHB)(
-    const char *file, int line,
-    const volatile void *mu) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateTraceMemory)(
-    const char *file, int line,
-    const volatile void *arg) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateThreadName)(
-    const char *file, int line,
-    const char *name) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreReadsBegin)(
-    const char *file, int line) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreReadsEnd)(
-    const char *file, int line) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreWritesBegin)(
-    const char *file, int line) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreWritesEnd)(
-    const char *file, int line) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreSyncBegin)(
-    const char *file, int line) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateIgnoreSyncEnd)(
-    const char *file, int line) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateEnableRaceDetection)(
-    const char *file, int line, int enable) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateNoOp)(
-    const char *file, int line,
-    const volatile void *arg) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-void DYNAMIC_ANNOTATIONS_NAME(AnnotateFlushState)(
-    const char *file, int line) DYNAMIC_ANNOTATIONS_ATTRIBUTE_WEAK;
-
-#if DYNAMIC_ANNOTATIONS_PROVIDE_RUNNING_ON_VALGRIND == 1
-/* Return non-zero value if running under valgrind.
-
-  If "valgrind.h" is included into dynamic_annotations.c,
-  the regular valgrind mechanism will be used.
-  See http://valgrind.org/docs/manual/manual-core-adv.html about
-  RUNNING_ON_VALGRIND and other valgrind "client requests".
-  The file "valgrind.h" may be obtained by doing
-     svn co svn://svn.valgrind.org/valgrind/trunk/include
-
-  If for some reason you can't use "valgrind.h" or want to fake valgrind,
-  there are two ways to make this function return non-zero:
-    - Use environment variable: export RUNNING_ON_VALGRIND=1
-    - Make your tool intercept the function RunningOnValgrind() and
-      change its return value.
- */
-int RunningOnValgrind(void);
-#endif /* DYNAMIC_ANNOTATIONS_PROVIDE_RUNNING_ON_VALGRIND == 1 */
-
-#ifdef __cplusplus
-}
-#endif
-
-#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus)
-
-  /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
-
-     Instead of doing
-        ANNOTATE_IGNORE_READS_BEGIN();
-        ... = x;
-        ANNOTATE_IGNORE_READS_END();
-     one can use
-        ... = ANNOTATE_UNPROTECTED_READ(x); */
-  template <class T>
-  inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) {
-    ANNOTATE_IGNORE_READS_BEGIN();
-    T res = x;
-    ANNOTATE_IGNORE_READS_END();
-    return res;
-  }
-  /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */
-  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)        \
-    namespace {                                                       \
-      class static_var ## _annotator {                                \
-       public:                                                        \
-        static_var ## _annotator() {                                  \
-          ANNOTATE_BENIGN_RACE_SIZED(&static_var,                     \
-                                      sizeof(static_var),             \
-            # static_var ": " description);                           \
-        }                                                             \
-      };                                                              \
-      static static_var ## _annotator the ## static_var ## _annotator;\
-    }
-#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
-
-  #define ANNOTATE_UNPROTECTED_READ(x) (x)
-  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)  /* empty */
-
-#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
-
-#endif  /* __DYNAMIC_ANNOTATIONS_H__ */
diff --git a/dynamic_annotations/third_party/valgrind/valgrind.h b/dynamic_annotations/third_party/valgrind/valgrind.h
deleted file mode 100644
index 65993fb..0000000
--- a/dynamic_annotations/third_party/valgrind/valgrind.h
+++ /dev/null
@@ -1,4528 +0,0 @@
-/* -*- c -*-
-   ----------------------------------------------------------------
-
-   Notice that the following BSD-style license applies to this one
-   file (valgrind.h) only.  The rest of Valgrind is licensed under the
-   terms of the GNU General Public License, version 2, unless
-   otherwise indicated.  See the COPYING file in the source
-   distribution for details.
-
-   ----------------------------------------------------------------
-
-   This file is part of Valgrind, a dynamic binary instrumentation
-   framework.
-
-   Copyright (C) 2000-2009 Julian Seward.  All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-   1. Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-   2. The origin of this software must not be misrepresented; you must 
-      not claim that you wrote the original software.  If you use this 
-      software in a product, an acknowledgment in the product 
-      documentation would be appreciated but is not required.
-
-   3. Altered source versions must be plainly marked as such, and must
-      not be misrepresented as being the original software.
-
-   4. The name of the author may not be used to endorse or promote 
-      products derived from this software without specific prior written 
-      permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
-   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   ----------------------------------------------------------------
-
-   Notice that the above BSD-style license applies to this one file
-   (valgrind.h) only.  The entire rest of Valgrind is licensed under
-   the terms of the GNU General Public License, version 2.  See the
-   COPYING file in the source distribution for details.
-
-   ---------------------------------------------------------------- 
-*/
-
-
-/* This file is for inclusion into client (your!) code.
-
-   You can use these macros to manipulate and query Valgrind's 
-   execution inside your own programs.
-
-   The resulting executables will still run without Valgrind, just a
-   little bit more slowly than they otherwise would, but otherwise
-   unchanged.  When not running on valgrind, each client request
-   consumes very few (eg. 7) instructions, so the resulting performance
-   loss is negligible unless you plan to execute client requests
-   millions of times per second.  Nevertheless, if that is still a
-   problem, you can compile with the NVALGRIND symbol defined (gcc
-   -DNVALGRIND) so that client requests are not even compiled in.  */
-
-#ifndef __VALGRIND_H
-#define __VALGRIND_H
-
-
-/* ------------------------------------------------------------------ */
-/* VERSION NUMBER OF VALGRIND                                         */
-/* ------------------------------------------------------------------ */
-
-/* Specify Valgrind's version number, so that user code can
-   conditionally compile based on our version number. */
-#define __VALGRIND__          3
-#define __VALGRIND_MINOR__    6
-
-
-#include <stdarg.h>
-
-/* Nb: this file might be included in a file compiled with -ansi.  So
-   we can't use C++ style "//" comments nor the "asm" keyword (instead
-   use "__asm__"). */
-
-/* Derive some tags indicating what the target platform is.  Note
-   that in this file we're using the compiler's CPP symbols for
-   identifying architectures, which are different to the ones we use
-   within the rest of Valgrind.  Note, __powerpc__ is active for both
-   32 and 64-bit PPC, whereas __powerpc64__ is only active for the
-   latter (on Linux, that is).
-
-   Misc note: how to find out what's predefined in gcc by default:
-   gcc -Wp,-dM somefile.c
-*/
-#undef PLAT_ppc64_aix5
-#undef PLAT_ppc32_aix5
-#undef PLAT_x86_darwin
-#undef PLAT_amd64_darwin
-#undef PLAT_x86_linux
-#undef PLAT_amd64_linux
-#undef PLAT_ppc32_linux
-#undef PLAT_ppc64_linux
-#undef PLAT_arm_linux
-
-#if defined(_AIX) && defined(__64BIT__)
-#  define PLAT_ppc64_aix5 1
-#elif defined(_AIX) && !defined(__64BIT__)
-#  define PLAT_ppc32_aix5 1
-#elif defined(__APPLE__) && defined(__i386__)
-#  define PLAT_x86_darwin 1
-#elif defined(__APPLE__) && defined(__x86_64__)
-#  define PLAT_amd64_darwin 1
-#elif defined(__linux__) && defined(__i386__)
-#  define PLAT_x86_linux 1
-#elif defined(__linux__) && defined(__x86_64__)
-#  define PLAT_amd64_linux 1
-#elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__)
-#  define PLAT_ppc32_linux 1
-#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__)
-#  define PLAT_ppc64_linux 1
-#elif defined(__linux__) && defined(__arm__)
-#  define PLAT_arm_linux 1
-#else
-/* If we're not compiling for our target platform, don't generate
-   any inline asms.  */
-#  if !defined(NVALGRIND)
-#    define NVALGRIND 1
-#  endif
-#endif
-
-
-/* ------------------------------------------------------------------ */
-/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS.  There is nothing */
-/* in here of use to end-users -- skip to the next section.           */
-/* ------------------------------------------------------------------ */
-
-#if defined(NVALGRIND)
-
-/* Define NVALGRIND to completely remove the Valgrind magic sequence
-   from the compiled code (analogous to NDEBUG's effects on
-   assert()) */
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-   {                                                              \
-      (_zzq_rlval) = (_zzq_default);                              \
-   }
-
-#else  /* ! NVALGRIND */
-
-/* The following defines the magic code sequences which the JITter
-   spots and handles magically.  Don't look too closely at them as
-   they will rot your brain.
-
-   The assembly code sequences for all architectures is in this one
-   file.  This is because this file must be stand-alone, and we don't
-   want to have multiple files.
-
-   For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default
-   value gets put in the return slot, so that everything works when
-   this is executed not under Valgrind.  Args are passed in a memory
-   block, and so there's no intrinsic limit to the number that could
-   be passed, but it's currently five.
-   
-   The macro args are: 
-      _zzq_rlval    result lvalue
-      _zzq_default  default value (result returned when running on real CPU)
-      _zzq_request  request code
-      _zzq_arg1..5  request params
-
-   The other two macros are used to support function wrapping, and are
-   a lot simpler.  VALGRIND_GET_NR_CONTEXT returns the value of the
-   guest's NRADDR pseudo-register and whatever other information is
-   needed to safely run the call original from the wrapper: on
-   ppc64-linux, the R2 value at the divert point is also needed.  This
-   information is abstracted into a user-visible type, OrigFn.
-
-   VALGRIND_CALL_NOREDIR_* behaves the same as the following on the
-   guest, but guarantees that the branch instruction will not be
-   redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64:
-   branch-and-link-to-r11.  VALGRIND_CALL_NOREDIR is just text, not a
-   complete inline asm, since it needs to be combined with more magic
-   inline asm stuff to be useful.
-*/
-
-/* ------------------------- x86-{linux,darwin} ---------------- */
-
-#if defined(PLAT_x86_linux)  ||  defined(PLAT_x86_darwin)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "roll $3,  %%edi ; roll $13, %%edi\n\t"      \
-                     "roll $29, %%edi ; roll $19, %%edi\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-  { volatile unsigned int _zzq_args[6];                           \
-    volatile unsigned int _zzq_result;                            \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %EDX = client_request ( %EAX ) */         \
-                     "xchgl %%ebx,%%ebx"                          \
-                     : "=d" (_zzq_result)                         \
-                     : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    volatile unsigned int __addr;                                 \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %EAX = guest_NRADDR */                    \
-                     "xchgl %%ecx,%%ecx"                          \
-                     : "=a" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_CALL_NOREDIR_EAX                                 \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* call-noredir *%EAX */                     \
-                     "xchgl %%edx,%%edx\n\t"
-#endif /* PLAT_x86_linux || PLAT_x86_darwin */
-
-/* ------------------------ amd64-{linux,darwin} --------------- */
-
-#if defined(PLAT_amd64_linux)  ||  defined(PLAT_amd64_darwin)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rolq $3,  %%rdi ; rolq $13, %%rdi\n\t"      \
-                     "rolq $61, %%rdi ; rolq $51, %%rdi\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-  { volatile unsigned long long int _zzq_args[6];                 \
-    volatile unsigned long long int _zzq_result;                  \
-    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
-    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %RDX = client_request ( %RAX ) */         \
-                     "xchgq %%rbx,%%rbx"                          \
-                     : "=d" (_zzq_result)                         \
-                     : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    volatile unsigned long long int __addr;                       \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %RAX = guest_NRADDR */                    \
-                     "xchgq %%rcx,%%rcx"                          \
-                     : "=a" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_CALL_NOREDIR_RAX                                 \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* call-noredir *%RAX */                     \
-                     "xchgq %%rdx,%%rdx\n\t"
-#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
-
-/* ------------------------ ppc32-linux ------------------------ */
-
-#if defined(PLAT_ppc32_linux)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned int  _zzq_args[6];                          \
-             unsigned int  _zzq_result;                           \
-             unsigned int* _zzq_ptr;                              \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 3,%1\n\t" /*default*/                    \
-                     "mr 4,%2\n\t" /*ptr*/                        \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"     /*result*/                     \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_default), "b" (_zzq_ptr)         \
-                     : "cc", "memory", "r3", "r4");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    unsigned int __addr;                                          \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory", "r3"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-#endif /* PLAT_ppc32_linux */
-
-/* ------------------------ ppc64-linux ------------------------ */
-
-#if defined(PLAT_ppc64_linux)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-      unsigned long long int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
-                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned long long int  _zzq_args[6];                \
-    register unsigned long long int  _zzq_result __asm__("r3");   \
-    register unsigned long long int* _zzq_ptr __asm__("r4");      \
-    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
-    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1"                                   \
-                     : "=r" (_zzq_result)                         \
-                     : "0" (_zzq_default), "r" (_zzq_ptr)         \
-                     : "cc", "memory");                           \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned long long int __addr __asm__("r3");         \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2"                                   \
-                     : "=r" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4"                                   \
-                     : "=r" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc64_linux */
-
-/* ------------------------- arm-linux ------------------------- */
-
-#if defined(PLAT_arm_linux)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-            "mov r12, r12, ror #3  ; mov r12, r12, ror #13 \n\t"  \
-            "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  { volatile unsigned int  _zzq_args[6];                          \
-    volatile unsigned int  _zzq_result;                           \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    __asm__ volatile("mov r3, %1\n\t" /*default*/                 \
-                     "mov r4, %2\n\t" /*ptr*/                     \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* R3 = client_request ( R4 ) */             \
-                     "orr r10, r10, r10\n\t"                      \
-                     "mov %0, r3"     /*result*/                  \
-                     : "=r" (_zzq_result)                         \
-                     : "r" (_zzq_default), "r" (&_zzq_args[0])    \
-                     : "cc","memory", "r3", "r4");                \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    unsigned int __addr;                                          \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* R3 = guest_NRADDR */                      \
-                     "orr r11, r11, r11\n\t"                      \
-                     "mov %0, r3"                                 \
-                     : "=r" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory", "r3"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                    \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R4 */        \
-                     "orr r12, r12, r12\n\t"
-
-#endif /* PLAT_arm_linux */
-
-/* ------------------------ ppc32-aix5 ------------------------- */
-
-#if defined(PLAT_ppc32_aix5)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-      unsigned int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned int  _zzq_args[7];                          \
-    register unsigned int  _zzq_result;                           \
-    register unsigned int* _zzq_ptr;                              \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    _zzq_args[6] = (unsigned int)(_zzq_default);                  \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 4,%1\n\t"                                \
-                     "lwz 3, 24(4)\n\t"                           \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_ptr)                             \
-                     : "r3", "r4", "cc", "memory");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned int __addr;                                 \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc32_aix5 */
-
-/* ------------------------ ppc64-aix5 ------------------------- */
-
-#if defined(PLAT_ppc64_aix5)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-      unsigned long long int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
-                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned long long int  _zzq_args[7];                \
-    register unsigned long long int  _zzq_result;                 \
-    register unsigned long long int* _zzq_ptr;                    \
-    _zzq_args[0] = (unsigned int long long)(_zzq_request);        \
-    _zzq_args[1] = (unsigned int long long)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned int long long)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned int long long)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned int long long)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned int long long)(_zzq_arg5);           \
-    _zzq_args[6] = (unsigned int long long)(_zzq_default);        \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 4,%1\n\t"                                \
-                     "ld 3, 48(4)\n\t"                            \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_ptr)                             \
-                     : "r3", "r4", "cc", "memory");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned long long int __addr;                       \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc64_aix5 */
-
-/* Insert assembly code for other platforms here... */
-
-#endif /* NVALGRIND */
-
-
-/* ------------------------------------------------------------------ */
-/* PLATFORM SPECIFICS for FUNCTION WRAPPING.  This is all very        */
-/* ugly.  It's the least-worst tradeoff I can think of.               */
-/* ------------------------------------------------------------------ */
-
-/* This section defines magic (a.k.a appalling-hack) macros for doing
-   guaranteed-no-redirection macros, so as to get from function
-   wrappers to the functions they are wrapping.  The whole point is to
-   construct standard call sequences, but to do the call itself with a
-   special no-redirect call pseudo-instruction that the JIT
-   understands and handles specially.  This section is long and
-   repetitious, and I can't see a way to make it shorter.
-
-   The naming scheme is as follows:
-
-      CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc}
-
-   'W' stands for "word" and 'v' for "void".  Hence there are
-   different macros for calling arity 0, 1, 2, 3, 4, etc, functions,
-   and for each, the possibility of returning a word-typed result, or
-   no result.
-*/
-
-/* Use these to write the name of your wrapper.  NOTE: duplicates
-   VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */
-
-/* Use an extra level of macroisation so as to ensure the soname/fnname
-   args are fully macro-expanded before pasting them together. */
-#define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd
-
-#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname)                    \
-   VG_CONCAT4(_vgwZU_,soname,_,fnname)
-
-#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname)                    \
-   VG_CONCAT4(_vgwZZ_,soname,_,fnname)
-
-/* Use this macro from within a wrapper function to collect the
-   context (address and possibly other info) of the original function.
-   Once you have that you can then use it in one of the CALL_FN_
-   macros.  The type of the argument _lval is OrigFn. */
-#define VALGRIND_GET_ORIG_FN(_lval)  VALGRIND_GET_NR_CONTEXT(_lval)
-
-/* Derivatives of the main macros below, for calling functions
-   returning void. */
-
-#define CALL_FN_v_v(fnptr)                                        \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_v(_junk,fnptr); } while (0)
-
-#define CALL_FN_v_W(fnptr, arg1)                                  \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_W(_junk,fnptr,arg1); } while (0)
-
-#define CALL_FN_v_WW(fnptr, arg1,arg2)                            \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0)
-
-#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3)                      \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0)
-
-#define CALL_FN_v_WWWW(fnptr, arg1,arg2,arg3,arg4)                \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_WWWW(_junk,fnptr,arg1,arg2,arg3,arg4); } while (0)
-
-#define CALL_FN_v_5W(fnptr, arg1,arg2,arg3,arg4,arg5)             \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_5W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5); } while (0)
-
-#define CALL_FN_v_6W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6)        \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_6W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6); } while (0)
-
-#define CALL_FN_v_7W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6,arg7)   \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_7W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6,arg7); } while (0)
-
-/* ------------------------- x86-{linux,darwin} ---------------- */
-
-#if defined(PLAT_x86_linux)  ||  defined(PLAT_x86_darwin)
-
-/* These regs are trashed by the hidden call.  No need to mention eax
-   as gcc can already see that, plus causes gcc to bomb. */
-#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx"
-
-/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned
-   long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      __asm__ volatile(                                           \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $4, %%esp\n"                                       \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      __asm__ volatile(                                           \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $8, %%esp\n"                                       \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      __asm__ volatile(                                           \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $12, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      __asm__ volatile(                                           \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $16, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      __asm__ volatile(                                           \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $20, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      __asm__ volatile(                                           \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $24, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      __asm__ volatile(                                           \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $28, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      __asm__ volatile(                                           \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $32, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      __asm__ volatile(                                           \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $36, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      __asm__ volatile(                                           \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $40, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
-                                  arg6,arg7,arg8,arg9,arg10,      \
-                                  arg11)                          \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      __asm__ volatile(                                           \
-         "pushl 44(%%eax)\n\t"                                    \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $44, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
-                                  arg6,arg7,arg8,arg9,arg10,      \
-                                  arg11,arg12)                    \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      _argvec[12] = (unsigned long)(arg12);                       \
-      __asm__ volatile(                                           \
-         "pushl 48(%%eax)\n\t"                                    \
-         "pushl 44(%%eax)\n\t"                                    \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $48, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_x86_linux || PLAT_x86_darwin */
-
-/* ------------------------ amd64-{linux,darwin} --------------- */
-
-#if defined(PLAT_amd64_linux)  ||  defined(PLAT_amd64_darwin)
-
-/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi",       \
-                            "rdi", "r8", "r9", "r10", "r11"
-
-/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned
-   long) == 8. */
-
-/* NB 9 Sept 07.  There is a nasty kludge here in all these CALL_FN_
-   macros.  In order not to trash the stack redzone, we need to drop
-   %rsp by 128 before the hidden call, and restore afterwards.  The
-   nastyness is that it is only by luck that the stack still appears
-   to be unwindable during the hidden call - since then the behaviour
-   of any routine using this macro does not match what the CFI data
-   says.  Sigh.
-
-   Why is this important?  Imagine that a wrapper has a stack
-   allocated local, and passes to the hidden call, a pointer to it.
-   Because gcc does not know about the hidden call, it may allocate
-   that local in the redzone.  Unfortunately the hidden call may then
-   trash it before it comes to use it.  So we must step clear of the
-   redzone, for the duration of the hidden call, to make it safe.
-
-   Probably the same problem afflicts the other redzone-style ABIs too
-   (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is
-   self describing (none of this CFI nonsense) so at least messing
-   with the stack pointer doesn't give a danger of non-unwindable
-   stack. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         "addq $128,%%rsp\n\t"                                    \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $8, %%rsp\n"                                       \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $16, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $24, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $32, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 88(%%rax)\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $40, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      _argvec[12] = (unsigned long)(arg12);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 96(%%rax)\n\t"                                    \
-         "pushq 88(%%rax)\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $48, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
-
-/* ------------------------ ppc32-linux ------------------------ */
-
-#if defined(PLAT_ppc32_linux)
-
-/* This is useful for finding out about the on-stack stuff:
-
-   extern int f9  ( int,int,int,int,int,int,int,int,int );
-   extern int f10 ( int,int,int,int,int,int,int,int,int,int );
-   extern int f11 ( int,int,int,int,int,int,int,int,int,int,int );
-   extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int );
-
-   int g9 ( void ) {
-      return f9(11,22,33,44,55,66,77,88,99);
-   }
-   int g10 ( void ) {
-      return f10(11,22,33,44,55,66,77,88,99,110);
-   }
-   int g11 ( void ) {
-      return f11(11,22,33,44,55,66,77,88,99,110,121);
-   }
-   int g12 ( void ) {
-      return f12(11,22,33,44,55,66,77,88,99,110,121,132);
-   }
-*/
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* These CALL_FN_ macros assume that on ppc32-linux, 
-   sizeof(unsigned long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-16\n\t"                                       \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,16\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-16\n\t"                                       \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,16\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      _argvec[11] = (unsigned long)arg11;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-32\n\t"                                       \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,16(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,32\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      _argvec[11] = (unsigned long)arg11;                         \
-      _argvec[12] = (unsigned long)arg12;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-32\n\t"                                       \
-         /* arg12 */                                              \
-         "lwz 3,48(11)\n\t"                                       \
-         "stw 3,20(1)\n\t"                                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,16(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,32\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc32_linux */
-
-/* ------------------------ ppc64-linux ------------------------ */
-
-#if defined(PLAT_ppc64_linux)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
-   long) == 8. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-128\n\t"  /* expand stack frame */            \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,128"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-128\n\t"  /* expand stack frame */            \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,128"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-144\n\t"  /* expand stack frame */            \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,144"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-144\n\t"  /* expand stack frame */            \
-         /* arg12 */                                              \
-         "ld  3,96(11)\n\t"                                       \
-         "std 3,136(1)\n\t"                                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,144"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc64_linux */
-
-/* ------------------------- arm-linux ------------------------- */
-
-#if defined(PLAT_arm_linux)
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4","r14"
-
-/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned
-   long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "mov %0, r0\n"                                           \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "mov %0, r0\n"                                           \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory",  __CALLER_SAVED_REGS         \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "mov %0, r0\n"                                           \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "mov %0, r0\n"                                           \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #20] \n\t"                                 \
-         "push {r0} \n\t"                                         \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "add sp, sp, #4 \n\t"                                    \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #20] \n\t"                                 \
-         "ldr r1, [%1, #24] \n\t"                                 \
-         "push {r0, r1} \n\t"                                     \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "add sp, sp, #8 \n\t"                                    \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #20] \n\t"                                 \
-         "ldr r1, [%1, #24] \n\t"                                 \
-         "ldr r2, [%1, #28] \n\t"                                 \
-         "push {r0, r1, r2} \n\t"                                 \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "add sp, sp, #12 \n\t"                                   \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #20] \n\t"                                 \
-         "ldr r1, [%1, #24] \n\t"                                 \
-         "ldr r2, [%1, #28] \n\t"                                 \
-         "ldr r3, [%1, #32] \n\t"                                 \
-         "push {r0, r1, r2, r3} \n\t"                             \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "add sp, sp, #16 \n\t"                                   \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #20] \n\t"                                 \
-         "ldr r1, [%1, #24] \n\t"                                 \
-         "ldr r2, [%1, #28] \n\t"                                 \
-         "ldr r3, [%1, #32] \n\t"                                 \
-         "ldr r4, [%1, #36] \n\t"                                 \
-         "push {r0, r1, r2, r3, r4} \n\t"                         \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "add sp, sp, #20 \n\t"                                   \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #40] \n\t"                                 \
-         "push {r0} \n\t"                                         \
-         "ldr r0, [%1, #20] \n\t"                                 \
-         "ldr r1, [%1, #24] \n\t"                                 \
-         "ldr r2, [%1, #28] \n\t"                                 \
-         "ldr r3, [%1, #32] \n\t"                                 \
-         "ldr r4, [%1, #36] \n\t"                                 \
-         "push {r0, r1, r2, r3, r4} \n\t"                         \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "add sp, sp, #24 \n\t"                                   \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
-                                  arg6,arg7,arg8,arg9,arg10,      \
-                                  arg11)                          \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #40] \n\t"                                 \
-         "ldr r1, [%1, #44] \n\t"                                 \
-         "push {r0, r1} \n\t"                                     \
-         "ldr r0, [%1, #20] \n\t"                                 \
-         "ldr r1, [%1, #24] \n\t"                                 \
-         "ldr r2, [%1, #28] \n\t"                                 \
-         "ldr r3, [%1, #32] \n\t"                                 \
-         "ldr r4, [%1, #36] \n\t"                                 \
-         "push {r0, r1, r2, r3, r4} \n\t"                         \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "add sp, sp, #28 \n\t"                                   \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory",__CALLER_SAVED_REGS           \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
-                                  arg6,arg7,arg8,arg9,arg10,      \
-                                  arg11,arg12)                    \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      _argvec[12] = (unsigned long)(arg12);                       \
-      __asm__ volatile(                                           \
-         "ldr r0, [%1, #40] \n\t"                                 \
-         "ldr r1, [%1, #44] \n\t"                                 \
-         "ldr r2, [%1, #48] \n\t"                                 \
-         "push {r0, r1, r2} \n\t"                                 \
-         "ldr r0, [%1, #20] \n\t"                                 \
-         "ldr r1, [%1, #24] \n\t"                                 \
-         "ldr r2, [%1, #28] \n\t"                                 \
-         "ldr r3, [%1, #32] \n\t"                                 \
-         "ldr r4, [%1, #36] \n\t"                                 \
-         "push {r0, r1, r2, r3, r4} \n\t"                         \
-         "ldr r0, [%1, #4] \n\t"                                  \
-         "ldr r1, [%1, #8] \n\t"                                  \
-         "ldr r2, [%1, #12] \n\t"                                 \
-         "ldr r3, [%1, #16] \n\t"                                 \
-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
-         "add sp, sp, #32 \n\t"                                   \
-         "mov %0, r0"                                             \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_arm_linux */
-
-/* ------------------------ ppc32-aix5 ------------------------- */
-
-#if defined(PLAT_ppc32_aix5)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* Expand the stack frame, copying enough info that unwinding
-   still works.  Trashes r3. */
-
-#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
-         "addi 1,1,-" #_n_fr "\n\t"                               \
-         "lwz  3," #_n_fr "(1)\n\t"                               \
-         "stw  3,0(1)\n\t"
-
-#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
-         "addi 1,1," #_n_fr "\n\t"
-
-/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned
-   long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t" /* arg2->r4 */                       \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(64)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(64)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,64(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(72)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
-         /* arg12 */                                              \
-         "lwz 3,48(11)\n\t"                                       \
-         "stw 3,68(1)\n\t"                                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,64(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(72)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc32_aix5 */
-
-/* ------------------------ ppc64-aix5 ------------------------- */
-
-#if defined(PLAT_ppc64_aix5)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* Expand the stack frame, copying enough info that unwinding
-   still works.  Trashes r3. */
-
-#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
-         "addi 1,1,-" #_n_fr "\n\t"                               \
-         "ld   3," #_n_fr "(1)\n\t"                               \
-         "std  3,0(1)\n\t"
-
-#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
-         "addi 1,1," #_n_fr "\n\t"
-
-/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned
-   long) == 8. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(128)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(128)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(144)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
-         /* arg12 */                                              \
-         "ld  3,96(11)\n\t"                                       \
-         "std 3,136(1)\n\t"                                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(144)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc64_aix5 */
-
-
-/* ------------------------------------------------------------------ */
-/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS.               */
-/*                                                                    */
-/* ------------------------------------------------------------------ */
-
-/* Some request codes.  There are many more of these, but most are not
-   exposed to end-user view.  These are the public ones, all of the
-   form 0x1000 + small_number.
-
-   Core ones are in the range 0x00000000--0x0000ffff.  The non-public
-   ones start at 0x2000.
-*/
-
-/* These macros are used by tools -- they must be public, but don't
-   embed them into other programs. */
-#define VG_USERREQ_TOOL_BASE(a,b) \
-   ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16))
-#define VG_IS_TOOL_USERREQ(a, b, v) \
-   (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000))
-
-/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! 
-   This enum comprises an ABI exported by Valgrind to programs
-   which use client requests.  DO NOT CHANGE THE ORDER OF THESE
-   ENTRIES, NOR DELETE ANY -- add new ones at the end. */
-typedef
-   enum { VG_USERREQ__RUNNING_ON_VALGRIND  = 0x1001,
-          VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002,
-
-          /* These allow any function to be called from the simulated
-             CPU but run on the real CPU.  Nb: the first arg passed to
-             the function is always the ThreadId of the running
-             thread!  So CLIENT_CALL0 actually requires a 1 arg
-             function, etc. */
-          VG_USERREQ__CLIENT_CALL0 = 0x1101,
-          VG_USERREQ__CLIENT_CALL1 = 0x1102,
-          VG_USERREQ__CLIENT_CALL2 = 0x1103,
-          VG_USERREQ__CLIENT_CALL3 = 0x1104,
-
-          /* Can be useful in regression testing suites -- eg. can
-             send Valgrind's output to /dev/null and still count
-             errors. */
-          VG_USERREQ__COUNT_ERRORS = 0x1201,
-
-          /* These are useful and can be interpreted by any tool that
-             tracks malloc() et al, by using vg_replace_malloc.c. */
-          VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301,
-          VG_USERREQ__FREELIKE_BLOCK   = 0x1302,
-          /* Memory pool support. */
-          VG_USERREQ__CREATE_MEMPOOL   = 0x1303,
-          VG_USERREQ__DESTROY_MEMPOOL  = 0x1304,
-          VG_USERREQ__MEMPOOL_ALLOC    = 0x1305,
-          VG_USERREQ__MEMPOOL_FREE     = 0x1306,
-          VG_USERREQ__MEMPOOL_TRIM     = 0x1307,
-          VG_USERREQ__MOVE_MEMPOOL     = 0x1308,
-          VG_USERREQ__MEMPOOL_CHANGE   = 0x1309,
-          VG_USERREQ__MEMPOOL_EXISTS   = 0x130a,
-
-          /* Allow printfs to valgrind log. */
-          /* The first two pass the va_list argument by value, which
-             assumes it is the same size as or smaller than a UWord,
-             which generally isn't the case.  Hence are deprecated.
-             The second two pass the vargs by reference and so are
-             immune to this problem. */
-          /* both :: char* fmt, va_list vargs (DEPRECATED) */
-          VG_USERREQ__PRINTF           = 0x1401,
-          VG_USERREQ__PRINTF_BACKTRACE = 0x1402,
-          /* both :: char* fmt, va_list* vargs */
-          VG_USERREQ__PRINTF_VALIST_BY_REF = 0x1403,
-          VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF = 0x1404,
-
-          /* Stack support. */
-          VG_USERREQ__STACK_REGISTER   = 0x1501,
-          VG_USERREQ__STACK_DEREGISTER = 0x1502,
-          VG_USERREQ__STACK_CHANGE     = 0x1503,
-
-          /* Wine support */
-          VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601
-   } Vg_ClientRequest;
-
-#if !defined(__GNUC__)
-#  define __extension__ /* */
-#endif
-
-/* Returns the number of Valgrinds this code is running under.  That
-   is, 0 if running natively, 1 if running under Valgrind, 2 if
-   running under Valgrind which is running under another Valgrind,
-   etc. */
-#define RUNNING_ON_VALGRIND  __extension__                        \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */,          \
-                               VG_USERREQ__RUNNING_ON_VALGRIND,   \
-                               0, 0, 0, 0, 0);                    \
-    _qzz_res;                                                     \
-   })
-
-
-/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
-   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
-   since it provides a way to make sure valgrind will retranslate the
-   invalidated area.  Returns no value. */
-#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__DISCARD_TRANSLATIONS,  \
-                               _qzz_addr, _qzz_len, 0, 0, 0);     \
-   }
-
-
-/* These requests are for getting Valgrind itself to print something.
-   Possibly with a backtrace.  This is a really ugly hack.  The return value
-   is the number of characters printed, excluding the "**<pid>** " part at the
-   start and the backtrace (if present). */
-
-#if defined(NVALGRIND)
-
-#  define VALGRIND_PRINTF(...)
-#  define VALGRIND_PRINTF_BACKTRACE(...)
-
-#else /* NVALGRIND */
-
-/* Modern GCC will optimize the static routine out if unused,
-   and unused attribute will shut down warnings about it.  */
-static int VALGRIND_PRINTF(const char *format, ...)
-   __attribute__((format(__printf__, 1, 2), __unused__));
-static int
-VALGRIND_PRINTF(const char *format, ...)
-{
-   unsigned long _qzz_res;
-   va_list vargs;
-   va_start(vargs, format);
-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,
-                              VG_USERREQ__PRINTF_VALIST_BY_REF,
-                              (unsigned long)format,
-                              (unsigned long)&vargs, 
-                              0, 0, 0);
-   va_end(vargs);
-   return (int)_qzz_res;
-}
-
-static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
-   __attribute__((format(__printf__, 1, 2), __unused__));
-static int
-VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
-{
-   unsigned long _qzz_res;
-   va_list vargs;
-   va_start(vargs, format);
-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,
-                              VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF,
-                              (unsigned long)format,
-                              (unsigned long)&vargs, 
-                              0, 0, 0);
-   va_end(vargs);
-   return (int)_qzz_res;
-}
-
-#endif /* NVALGRIND */
-
-
-/* These requests allow control to move from the simulated CPU to the
-   real CPU, calling an arbitary function.
-   
-   Note that the current ThreadId is inserted as the first argument.
-   So this call:
-
-     VALGRIND_NON_SIMD_CALL2(f, arg1, arg2)
-
-   requires f to have this signature:
-
-     Word f(Word tid, Word arg1, Word arg2)
-
-   where "Word" is a word-sized type.
-
-   Note that these client requests are not entirely reliable.  For example,
-   if you call a function with them that subsequently calls printf(),
-   there's a high chance Valgrind will crash.  Generally, your prospects of
-   these working are made higher if the called function does not refer to
-   any global variables, and does not refer to any libc or other functions
-   (printf et al).  Any kind of entanglement with libc or dynamic linking is
-   likely to have a bad outcome, for tricky reasons which we've grappled
-   with a lot in the past.
-*/
-#define VALGRIND_NON_SIMD_CALL0(_qyy_fn)                          \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL0,          \
-                               _qyy_fn,                           \
-                               0, 0, 0, 0);                       \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1)               \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL1,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, 0, 0, 0);               \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2)    \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL2,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, _qyy_arg2, 0, 0);       \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL3,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, _qyy_arg2,              \
-                               _qyy_arg3, 0);                     \
-    _qyy_res;                                                     \
-   })
-
-
-/* Counts the number of errors that have been recorded by a tool.  Nb:
-   the tool must record the errors with VG_(maybe_record_error)() or
-   VG_(unique_error)() for them to be counted. */
-#define VALGRIND_COUNT_ERRORS                                     \
-   __extension__                                                  \
-   ({unsigned int _qyy_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__COUNT_ERRORS,          \
-                               0, 0, 0, 0, 0);                    \
-    _qyy_res;                                                     \
-   })
-
-/* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing
-   when heap blocks are allocated in order to give accurate results.  This
-   happens automatically for the standard allocator functions such as
-   malloc(), calloc(), realloc(), memalign(), new, new[], free(), delete,
-   delete[], etc.
-
-   But if your program uses a custom allocator, this doesn't automatically
-   happen, and Valgrind will not do as well.  For example, if you allocate
-   superblocks with mmap() and then allocates chunks of the superblocks, all
-   Valgrind's observations will be at the mmap() level and it won't know that
-   the chunks should be considered separate entities.  In Memcheck's case,
-   that means you probably won't get heap block overrun detection (because
-   there won't be redzones marked as unaddressable) and you definitely won't
-   get any leak detection.
-
-   The following client requests allow a custom allocator to be annotated so
-   that it can be handled accurately by Valgrind.
-
-   VALGRIND_MALLOCLIKE_BLOCK marks a region of memory as having been allocated
-   by a malloc()-like function.  For Memcheck (an illustrative case), this
-   does two things:
-
-   - It records that the block has been allocated.  This means any addresses
-     within the block mentioned in error messages will be
-     identified as belonging to the block.  It also means that if the block
-     isn't freed it will be detected by the leak checker.
-
-   - It marks the block as being addressable and undefined (if 'is_zeroed' is
-     not set), or addressable and defined (if 'is_zeroed' is set).  This
-     controls how accesses to the block by the program are handled.
-   
-   'addr' is the start of the usable block (ie. after any
-   redzone), 'sizeB' is its size.  'rzB' is the redzone size if the allocator
-   can apply redzones -- these are blocks of padding at the start and end of
-   each block.  Adding redzones is recommended as it makes it much more likely
-   Valgrind will spot block overruns.  `is_zeroed' indicates if the memory is
-   zeroed (or filled with another predictable value), as is the case for
-   calloc().
-   
-   VALGRIND_MALLOCLIKE_BLOCK should be put immediately after the point where a
-   heap block -- that will be used by the client program -- is allocated.
-   It's best to put it at the outermost level of the allocator if possible;
-   for example, if you have a function my_alloc() which calls
-   internal_alloc(), and the client request is put inside internal_alloc(),
-   stack traces relating to the heap block will contain entries for both
-   my_alloc() and internal_alloc(), which is probably not what you want.
-
-   For Memcheck users: if you use VALGRIND_MALLOCLIKE_BLOCK to carve out
-   custom blocks from within a heap block, B, that has been allocated with
-   malloc/calloc/new/etc, then block B will be *ignored* during leak-checking
-   -- the custom blocks will take precedence.
-
-   VALGRIND_FREELIKE_BLOCK is the partner to VALGRIND_MALLOCLIKE_BLOCK.  For
-   Memcheck, it does two things:
-
-   - It records that the block has been deallocated.  This assumes that the
-     block was annotated as having been allocated via
-     VALGRIND_MALLOCLIKE_BLOCK.  Otherwise, an error will be issued.
-
-   - It marks the block as being unaddressable.
-
-   VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a
-   heap block is deallocated.
-
-   In many cases, these two client requests will not be enough to get your
-   allocator working well with Memcheck.  More specifically, if your allocator
-   writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call
-   will be necessary to mark the memory as addressable just before the zeroing
-   occurs, otherwise you'll get a lot of invalid write errors.  For example,
-   you'll need to do this if your allocator recycles freed blocks, but it
-   zeroes them before handing them back out (via VALGRIND_MALLOCLIKE_BLOCK).
-   Alternatively, if your allocator reuses freed blocks for allocator-internal
-   data structures, VALGRIND_MAKE_MEM_UNDEFINED calls will also be necessary.
-
-   Really, what's happening is a blurring of the lines between the client
-   program and the allocator... after VALGRIND_FREELIKE_BLOCK is called, the
-   memory should be considered unaddressable to the client program, but the
-   allocator knows more than the rest of the client program and so may be able
-   to safely access it.  Extra client requests are necessary for Valgrind to
-   understand the distinction between the allocator and the rest of the
-   program.
-
-   Note: there is currently no VALGRIND_REALLOCLIKE_BLOCK client request;  it
-   has to be emulated with MALLOCLIKE/FREELIKE and memory copying.
-   
-   Ignored if addr == 0.
-*/
-#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed)    \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MALLOCLIKE_BLOCK,      \
-                               addr, sizeB, rzB, is_zeroed, 0);   \
-   }
-
-/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details.
-   Ignored if addr == 0.
-*/
-#define VALGRIND_FREELIKE_BLOCK(addr, rzB)                        \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__FREELIKE_BLOCK,        \
-                               addr, rzB, 0, 0, 0);               \
-   }
-
-/* Create a memory pool. */
-#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed)             \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__CREATE_MEMPOOL,        \
-                               pool, rzB, is_zeroed, 0, 0);       \
-   }
-
-/* Destroy a memory pool. */
-#define VALGRIND_DESTROY_MEMPOOL(pool)                            \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__DESTROY_MEMPOOL,       \
-                               pool, 0, 0, 0, 0);                 \
-   }
-
-/* Associate a piece of memory with a memory pool. */
-#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size)                  \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_ALLOC,         \
-                               pool, addr, size, 0, 0);           \
-   }
-
-/* Disassociate a piece of memory from a memory pool. */
-#define VALGRIND_MEMPOOL_FREE(pool, addr)                         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_FREE,          \
-                               pool, addr, 0, 0, 0);              \
-   }
-
-/* Disassociate any pieces outside a particular range. */
-#define VALGRIND_MEMPOOL_TRIM(pool, addr, size)                   \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_TRIM,          \
-                               pool, addr, size, 0, 0);           \
-   }
-
-/* Resize and/or move a piece associated with a memory pool. */
-#define VALGRIND_MOVE_MEMPOOL(poolA, poolB)                       \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MOVE_MEMPOOL,          \
-                               poolA, poolB, 0, 0, 0);            \
-   }
-
-/* Resize and/or move a piece associated with a memory pool. */
-#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size)         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_CHANGE,        \
-                               pool, addrA, addrB, size, 0);      \
-   }
-
-/* Return 1 if a mempool exists, else 0. */
-#define VALGRIND_MEMPOOL_EXISTS(pool)                             \
-   __extension__                                                  \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_EXISTS,        \
-                               pool, 0, 0, 0, 0);                 \
-    _qzz_res;                                                     \
-   })
-
-/* Mark a piece of memory as being a stack. Returns a stack id. */
-#define VALGRIND_STACK_REGISTER(start, end)                       \
-   __extension__                                                  \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_REGISTER,        \
-                               start, end, 0, 0, 0);              \
-    _qzz_res;                                                     \
-   })
-
-/* Unmark the piece of memory associated with a stack id as being a
-   stack. */
-#define VALGRIND_STACK_DEREGISTER(id)                             \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_DEREGISTER,      \
-                               id, 0, 0, 0, 0);                   \
-   }
-
-/* Change the start and end address of the stack id. */
-#define VALGRIND_STACK_CHANGE(id, start, end)                     \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_CHANGE,          \
-                               id, start, end, 0, 0);             \
-   }
-
-/* Load PDB debug info for Wine PE image_map. */
-#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta)   \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__LOAD_PDB_DEBUGINFO,    \
-                               fd, ptr, total_size, delta, 0);    \
-   }
-
-
-#undef PLAT_x86_linux
-#undef PLAT_amd64_linux
-#undef PLAT_ppc32_linux
-#undef PLAT_ppc64_linux
-#undef PLAT_arm_linux
-#undef PLAT_ppc32_aix5
-#undef PLAT_ppc64_aix5
-
-#endif   /* __VALGRIND_H */
diff --git a/main/Android.mk b/main/Android.mk
index 744b6ed..ef26cd5 100644
--- a/main/Android.mk
+++ b/main/Android.mk
@@ -245,7 +245,9 @@
 
 LOCAL_LDFLAGS := $(vex_ldflags)
 
+# TODO: split asflags out from cflags.
 LOCAL_CFLAGS := $(common_cflags)
+LOCAL_ASFLAGS := $(common_cflags)
 
 include $(BUILD_STATIC_LIBRARY)
 
diff --git a/main/VEX/priv/guest_arm_toIR.c b/main/VEX/priv/guest_arm_toIR.c
index 22aa3db..220c50f 100644
--- a/main/VEX/priv/guest_arm_toIR.c
+++ b/main/VEX/priv/guest_arm_toIR.c
@@ -12357,14 +12357,30 @@
       UInt size = bSX == 0 ? 16 : 32;
       Int frac_bits = size - ((imm4 << 1) | bI);
       UInt d = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
-      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op && size == 32) {
-         /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
+      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && size == 32) {
+         /* dp_op == 0 : VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
+         /* dp_op == 1 : VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
          /* This generates really horrible code.  We could potentially
             do much better. */
          IRTemp rmode = newTemp(Ity_I32);
          assign(rmode, mkU32(Irrm_NEAREST)); // rmode that this insn is defd to use
          IRTemp src32 = newTemp(Ity_I32);
-         assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
+         if (dp_op == 0) {
+            assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
+         } else {
+            /* Example code sequence of using vcvt.f64.s32. The s32 value is
+               initialized in s14 but loaded via d7 (s14 is the low half of
+               d7), so we need to decode the register using getDReg instead of
+               getFReg. Since the conversion size is from s32 to f64, we also
+               need to explicitly extract the low half of i64 here.
+
+               81a0:       ee07 2a10       vmov            s14, r2
+               81a4:       eeba 7bef       vcvt.f64.s32    d7, d7, #1
+             */
+            IRTemp src64 = newTemp(Ity_I64);
+            assign(src64,  unop(Iop_ReinterpF64asI64, getDReg(d)));
+            assign(src32, unop(Iop_64to32, mkexpr(src64)));
+         }
          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
                                 mkexpr(src32 ) );
          IRTemp scale = newTemp(Ity_F64);
@@ -12373,10 +12389,16 @@
          IRExpr* resF64 = triop(Iop_DivF64,
                                 rm, as_F64, 
                                 triop(Iop_AddF64, rm, mkexpr(scale), mkexpr(scale)));
-         IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
-         putFReg(d, resF32, condT);
-         DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
-             unsyned ? 'u' : 's', d, d, frac_bits);
+         if (dp_op == 0) {
+            IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
+            putFReg(d, resF32, condT);
+            DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
+                unsyned ? 'u' : 's', d, d, frac_bits);
+         } else {
+            putDReg(d, resF64, condT);
+            DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
+                unsyned ? 'u' : 's', d, d, frac_bits);
+         }
          goto decode_success_vfp;
       }
       /* fall through */
@@ -13664,6 +13686,51 @@
       /* fall through */
    }
 
+   /* --------------------- Integer Divides --------------------- */
+   // SDIV
+   if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
+       && INSN(15,12) == BITS4(1,1,1,1)
+       && INSN(7,4) == BITS4(0,0,0,1)) {
+      UInt rD = INSN(19,16);
+      UInt rM = INSN(11,8);
+      UInt rN = INSN(3,0);
+      if (rD == 15 || rM == 15 || rN == 15) {
+         /* Unpredictable; don't decode; fall through */
+      } else {
+         IRTemp res = newTemp(Ity_I32);
+         IRTemp argL  = newTemp(Ity_I32);
+         IRTemp argR  = newTemp(Ity_I32);
+         assign(argL, getIRegA(rN));
+         assign(argR, getIRegA(rM));
+         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
+         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+         DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
+         goto decode_success;
+      }
+    }
+
+   // UDIV
+   if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
+       && INSN(15,12) == BITS4(1,1,1,1)
+       && INSN(7,4) == BITS4(0,0,0,1)) {
+      UInt rD = INSN(19,16);
+      UInt rM = INSN(11,8);
+      UInt rN = INSN(3,0);
+      if (rD == 15 || rM == 15 || rN == 15) {
+         /* Unpredictable; don't decode; fall through */
+      } else {
+         IRTemp res = newTemp(Ity_I32);
+         IRTemp argL  = newTemp(Ity_I32);
+         IRTemp argR  = newTemp(Ity_I32);
+         assign(argL, getIRegA(rN));
+         assign(argR, getIRegA(rM));
+         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
+         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+         DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
+         goto decode_success;
+      }
+    }
+
    // MLA, MLS
    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
        && INSN(7,4) == BITS4(1,0,0,1)) {
@@ -18112,6 +18179,44 @@
       }
    }
 
+   /* -------------- SDIV.W Rd, Rn, Rm -------------- */
+   if (INSN0(15,4) == 0xFB9
+       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
+      UInt rN = INSN0(3,0);
+      UInt rD = INSN1(11,8);
+      UInt rM = INSN1(3,0);
+      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+         IRTemp res = newTemp(Ity_I32);
+         IRTemp argL  = newTemp(Ity_I32);
+         IRTemp argR  = newTemp(Ity_I32);
+         assign(argL, getIRegT(rN));
+         assign(argR, getIRegT(rM));
+         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
+         putIRegT(rD, mkexpr(res), condT);
+         DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
+         goto decode_success;
+      }
+   }
+
+   /* -------------- UDIV.W Rd, Rn, Rm -------------- */
+   if (INSN0(15,4) == 0xFBB
+       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
+      UInt rN = INSN0(3,0);
+      UInt rD = INSN1(11,8);
+      UInt rM = INSN1(3,0);
+      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+         IRTemp res = newTemp(Ity_I32);
+         IRTemp argL  = newTemp(Ity_I32);
+         IRTemp argR  = newTemp(Ity_I32);
+         assign(argL, getIRegT(rN));
+         assign(argR, getIRegT(rM));
+         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
+         putIRegT(rD, mkexpr(res), condT);
+         DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
+         goto decode_success;
+      }
+   }
+
    /* ------------------ {U,S}MULL ------------------ */
    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
        && INSN1(7,4) == BITS4(0,0,0,0)) {
diff --git a/main/VEX/priv/host_arm_defs.c b/main/VEX/priv/host_arm_defs.c
index e428da0..6428b35 100644
--- a/main/VEX/priv/host_arm_defs.c
+++ b/main/VEX/priv/host_arm_defs.c
@@ -708,7 +708,7 @@
    }
 }
 
-HChar* showARMMulOp ( ARMMulOp op ) {
+HChar* showARMMulOp ( ARMMulDivOp op ) {
    switch (op) {
       case ARMmul_PLAIN: return "mul";
       case ARMmul_ZX:    return "umull";
@@ -717,6 +717,14 @@
    }
 }
 
+HChar* showARMDivOp ( ARMMulDivOp op ) {
+   switch (op) {
+      case ARMdiv_S:     return "sdiv";
+      case ARMdiv_U:     return "udiv";
+      default: vpanic("showARMDivOp");
+   }
+}
+
 HChar* showARMVfpOp ( ARMVfpOp op ) {
    switch (op) {
       case ARMvfp_ADD: return "add";
@@ -1216,12 +1224,21 @@
    i->ARMin.Call.nArgRegs = nArgRegs;
    return i;
 }
-ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
+ARMInstr* ARMInstr_Mul ( ARMMulDivOp op ) {
    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
    i->tag          = ARMin_Mul;
    i->ARMin.Mul.op = op;
    return i;
 }
+ARMInstr* ARMInstr_Div ( ARMMulDivOp op, HReg dst, HReg argL, HReg argR ) {
+   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+   i->tag          = ARMin_Div;
+   i->ARMin.Div.op = op;
+   i->ARMin.Div.dst  = dst;
+   i->ARMin.Div.argL = argL;
+   i->ARMin.Div.argR = argR;
+   return i;
+}
 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
    i->tag             = ARMin_LdrEX;
@@ -1662,6 +1679,14 @@
             vex_printf("r1:r0, r2, r3");
          }
          return;
+      case ARMin_Div:
+         vex_printf("%-5s ", showARMDivOp(i->ARMin.Div.op));
+         ppHRegARM(i->ARMin.Div.dst);
+         vex_printf(", ");
+         ppHRegARM(i->ARMin.Div.argL);
+         vex_printf(", ");
+         ppHRegARM(i->ARMin.Div.argR);
+         return;
       case ARMin_LdrEX: {
          HChar* sz = "";
          switch (i->ARMin.LdrEX.szB) {
@@ -2083,6 +2108,11 @@
          if (i->ARMin.Mul.op != ARMmul_PLAIN)
             addHRegUse(u, HRmWrite, hregARM_R1());
          return;
+      case ARMin_Div:
+         addHRegUse(u, HRmWrite, i->ARMin.Div.dst);
+         addHRegUse(u, HRmRead, i->ARMin.Div.argL);
+         addHRegUse(u, HRmRead, i->ARMin.Div.argR);
+         return;
       case ARMin_LdrEX:
          addHRegUse(u, HRmRead, hregARM_R4());
          addHRegUse(u, HRmWrite, hregARM_R2());
@@ -2318,6 +2348,11 @@
          return;
       case ARMin_Mul:
          return;
+      case ARMin_Div:
+         i->ARMin.Div.dst = lookupHRegRemap(m, i->ARMin.Div.dst);
+         i->ARMin.Div.argL = lookupHRegRemap(m, i->ARMin.Div.argL);
+         i->ARMin.Div.argR = lookupHRegRemap(m, i->ARMin.Div.argR);
+         return;
       case ARMin_LdrEX:
          return;
       case ARMin_StrEX:
@@ -3303,6 +3338,16 @@
          }
          goto bad;
       }
+      case ARMin_Div: {
+         UInt subopc = i->ARMin.Div.op == ARMdiv_U ?
+                        X0011 : X0001;
+         UInt rD    = iregNo(i->ARMin.Div.dst);
+         UInt rN    = iregNo(i->ARMin.Div.argL);
+         UInt rM    = iregNo(i->ARMin.Div.argR);
+         UInt instr = XXXXXXXX(X1110, X0111, subopc, rD, 0xF, rM, X0001, rN);
+         *p++ = instr;
+         goto done;
+      }
       case ARMin_LdrEX: {
          /* E1D42F9F   ldrexb r2, [r4]
             E1F42F9F   ldrexh r2, [r4]
diff --git a/main/VEX/priv/host_arm_defs.h b/main/VEX/priv/host_arm_defs.h
index 91a6757..7c80bea 100644
--- a/main/VEX/priv/host_arm_defs.h
+++ b/main/VEX/priv/host_arm_defs.h
@@ -383,12 +383,15 @@
    enum {
       ARMmul_PLAIN=60,
       ARMmul_ZX,
-      ARMmul_SX
+      ARMmul_SX,
+      ARMdiv_S,
+      ARMdiv_U
    }
-   ARMMulOp;
+   ARMMulDivOp;
 
-extern HChar* showARMMulOp ( ARMMulOp op );
+extern HChar* showARMMulOp ( ARMMulDivOp op );
 
+extern HChar* showARMDivOp ( ARMMulDivOp op );
 
 typedef
    enum {
@@ -570,6 +573,7 @@
       ARMin_CMov,
       ARMin_Call,
       ARMin_Mul,
+      ARMin_Div,
       ARMin_LdrEX,
       ARMin_StrEX,
       /* vfp */
@@ -727,8 +731,15 @@
             complexity).  Hence hardwire it.  At least using caller-saves
             registers, which are less likely to be in use. */
          struct {
-            ARMMulOp op;
+            ARMMulDivOp op;
          } Mul;
+         /* ARMdiv_S/ARMdiv_U: signed/unsigned integer divides, respectively. */
+         struct {
+            ARMMulDivOp op;
+            HReg        dst;
+            HReg        argL;
+            HReg        argR;
+         } Div;
          /* LDREX{,H,B} r2, [r4]  and
             LDREXD r2, r3, [r4]   (on LE hosts, transferred value is r3:r2)
             Again, hardwired registers since this is not performance
@@ -958,7 +969,9 @@
                                       ARMCondCode cond, IRJumpKind jk );
 extern ARMInstr* ARMInstr_CMov     ( ARMCondCode, HReg dst, ARMRI84* src );
 extern ARMInstr* ARMInstr_Call     ( ARMCondCode, HWord, Int nArgRegs );
-extern ARMInstr* ARMInstr_Mul      ( ARMMulOp op );
+extern ARMInstr* ARMInstr_Mul      ( ARMMulDivOp op );
+extern ARMInstr* ARMInstr_Div      ( ARMMulDivOp op, HReg dst, HReg argL,
+                                     HReg argR );
 extern ARMInstr* ARMInstr_LdrEX    ( Int szB );
 extern ARMInstr* ARMInstr_StrEX    ( Int szB );
 extern ARMInstr* ARMInstr_VLdStD   ( Bool isLoad, HReg, ARMAModeV* );
diff --git a/main/VEX/priv/host_arm_isel.c b/main/VEX/priv/host_arm_isel.c
index 62739fd..13c1f2d 100644
--- a/main/VEX/priv/host_arm_isel.c
+++ b/main/VEX/priv/host_arm_isel.c
@@ -1193,6 +1193,19 @@
          default: break;
       }
 
+      /* SDIV/UDIV */
+      if (e->Iex.Binop.op == Iop_DivU32 || e->Iex.Binop.op == Iop_DivS32) {
+         HReg     dst  = newVRegI(env);
+         HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+
+         addInstr(env,
+                  ARMInstr_Div(e->Iex.Binop.op == Iop_DivU32 ?
+                                  ARMdiv_U : ARMdiv_S,
+                               dst, argL, argR));
+         return dst;
+      }
+
       /* SHL/SHR/SAR */
       switch (e->Iex.Binop.op) {
          case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
@@ -1889,7 +1902,7 @@
             HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
             HReg     tHi  = newVRegI(env);
             HReg     tLo  = newVRegI(env);
-            ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
+            ARMMulDivOp mop  = e->Iex.Binop.op == Iop_MullS32
                                ? ARMmul_SX : ARMmul_ZX;
             addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
             addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
diff --git a/main/config.h b/main/config.h
index dbda8ca..5f78be8 100644
--- a/main/config.h
+++ b/main/config.h
@@ -1,7 +1,8 @@
+/* config.h.  Generated from config.h.in by configure.  */
 /* config.h.in.  Generated from configure.in by autoheader.  */
 
-/* Define to 1 if you're using AIX 5.1 or 5.2 or 5.3 */
-/* #undef AIX5_LIBC */
+/* Define to 1 if you're using Bionic */
+/* #undef BIONIC_LIBC */
 
 /* DARWIN_VERS value for Mac OS X 10.5 */
 /* #undef DARWIN_10_5 */
@@ -12,6 +13,9 @@
 /* DARWIN_VERS value for Mac OS X 10.7 */
 /* #undef DARWIN_10_7 */
 
+/* DARWIN_VERS value for Mac OS X 10.8 */
+/* #undef DARWIN_10_8 */
+
 /* Define to 1 if you're using Darwin */
 /* #undef DARWIN_LIBC */
 
@@ -22,17 +26,29 @@
 /* #undef ENABLE_INNER */
 
 /* path to GDB */
-#define GDB_PATH "/system/bin/gdb"
+#define GDB_PATH "/usr/bin/gdb"
 
 /* Define to 1 if you're using glibc 2.10.x */
 /* #undef GLIBC_2_10 */
 
 /* Define to 1 if you're using glibc 2.11.x */
-/* #undef GLIBC_2_11 */
+/* #dundef GLIBC_2_11 1 */
 
 /* Define to 1 if you're using glibc 2.12.x */
 /* #undef GLIBC_2_12 */
 
+/* Define to 1 if you're using glibc 2.13.x */
+/* #undef GLIBC_2_13 */
+
+/* Define to 1 if you're using glibc 2.14.x */
+/* #undef GLIBC_2_14 */
+
+/* Define to 1 if you're using glibc 2.15.x */
+/* #undef GLIBC_2_15 */
+
+/* Define to 1 if you're using glibc 2.16.x */
+/* #undef GLIBC_2_16 */
+
 /* Define to 1 if you're using glibc 2.2.x */
 /* #undef GLIBC_2_2 */
 
@@ -66,9 +82,14 @@
 /* Define to 1 if as supports mtocrf/mfocrf. */
 /* #undef HAVE_AS_PPC_MFTOCRF */
 
-/* Define to 1 if gcc supports __sync_bool_compare_and_swap() a.o. */
+/* Define to 1 if gcc supports __sync_bool_compare_and_swap() and
+   __sync_add_and_fetch() for the primary target */
 /* #undef HAVE_BUILTIN_ATOMIC */
 
+/* Define to 1 if g++ supports __sync_bool_compare_and_swap() and
+   __sync_add_and_fetch() */
+/* #undef HAVE_BUILTIN_ATOMIC_CXX */
+
 /* Define to 1 if you have the `clock_gettime' function. */
 #define HAVE_CLOCK_GETTIME 1
 
@@ -76,13 +97,13 @@
 #define HAVE_CLOCK_MONOTONIC 1
 
 /* Define to 1 if you have the <endian.h> header file. */
-/* #define HAVE_ENDIAN_H 1 -- already defined in AndroidConfig.h */
+/* #undef HAVE_ENDIAN_H */
 
 /* Define to 1 if you have the `epoll_create' function. */
 #define HAVE_EPOLL_CREATE 1
 
 /* Define to 1 if you have the `epoll_pwait' function. */
-/* #undef HAVE_EPOLL_PWAIT */
+#define HAVE_EPOLL_PWAIT 1
 
 /* Define to 1 if you have the `eventfd' function. */
 #define HAVE_EVENTFD 1
@@ -90,9 +111,6 @@
 /* Define to 1 if you have the `eventfd_read' function. */
 #define HAVE_EVENTFD_READ 1
 
-/* Define to 1 if you have the `floor' function. */
-#define HAVE_FLOOR 1
-
 /* Define to 1 if you have the `getpagesize' function. */
 #define HAVE_GETPAGESIZE 1
 
@@ -102,8 +120,11 @@
 /* Define to 1 if you have the `klogctl' function. */
 #define HAVE_KLOGCTL 1
 
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#define HAVE_LIBPTHREAD 1
+
 /* Define to 1 if you have the `rt' library (-lrt). */
-/* #undef HAVE_LIBRT */
+#define HAVE_LIBRT 1
 
 /* Define to 1 if you have the `mallinfo' function. */
 #define HAVE_MALLINFO 1
@@ -124,19 +145,25 @@
 #define HAVE_MMAP 1
 
 /* Define to 1 if you have the <mqueue.h> header file. */
-/* #undef HAVE_MQUEUE_H */
+#define HAVE_MQUEUE_H 1
 
 /* Define to 1 if you have the `mremap' function. */
 #define HAVE_MREMAP 1
 
 /* Define to 1 if you have the `ppoll' function. */
-/* #undef HAVE_PPOLL */
+#define HAVE_PPOLL 1
+
+/* Define to 1 if you have the `process_vm_readv' function. */
+/* #undef HAVE_PROCESS_VM_READV */
+
+/* Define to 1 if you have the `process_vm_writev' function. */
+/* #undef HAVE_PROCESS_VM_WRITEV */
 
 /* Define to 1 if you have the `pthread_barrier_init' function. */
 /* #undef HAVE_PTHREAD_BARRIER_INIT */
 
 /* Define to 1 if you have the `pthread_condattr_setclock' function. */
-/* #undef HAVE_PTHREAD_CONDATTR_SETCLOCK */
+#define HAVE_PTHREAD_CONDATTR_SETCLOCK 1
 
 /* Define to 1 if you have the `pthread_create@glibc2.0' function. */
 /* #undef HAVE_PTHREAD_CREATE_GLIBC_2_0 */
@@ -163,6 +190,9 @@
    constant. */
 #define HAVE_PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP 1
 
+/* Define to 1 if you have the `pthread_rwlock_t' type. */
+#define HAVE_PTHREAD_RWLOCK_T 1
+
 /* Define to 1 if you have the `pthread_rwlock_timedrdlock' function. */
 #define HAVE_PTHREAD_RWLOCK_TIMEDRDLOCK 1
 
@@ -173,26 +203,22 @@
 /* #undef HAVE_PTHREAD_SPIN_LOCK */
 
 /* Define to 1 if you have the `pthread_yield' function. */
-/* #undef HAVE_PTHREAD_YIELD */
-
-/* Define to 1 if the installed version of Qt4 provides QAtomicInt. */
-/* #undef HAVE_QTCORE_QATOMICINT */
-
-/* Define to 1 if the installed version of Qt4 provides QMutex::tryLock(int).
-   */
-/* #undef HAVE_QTCORE_QMUTEX_TRYLOCK_INT */
+#define HAVE_PTHREAD_YIELD 1
 
 /* Define to 1 if you have the `readlinkat' function. */
-/* #undef HAVE_READLINKAT */
+#define HAVE_READLINKAT 1
 
 /* Define to 1 if you have the `semtimedop' function. */
-/* #undef HAVE_SEMTIMEDOP */
+#define HAVE_SEMTIMEDOP 1
+
+/* Define to 1 if libstd++ supports annotating shared pointers */
+/* #undef HAVE_SHARED_POINTER_ANNOTATION */
 
 /* Define to 1 if you have the `signalfd' function. */
-/* #undef HAVE_SIGNALFD */
+#define HAVE_SIGNALFD 1
 
 /* Define to 1 if you have the `sigwaitinfo' function. */
-/* #undef HAVE_SIGWAITINFO */
+#define HAVE_SIGWAITINFO 1
 
 /* Define to 1 if you have the <stdint.h> header file. */
 #define HAVE_STDINT_H 1
@@ -225,7 +251,7 @@
 #define HAVE_SYSCALL 1
 
 /* Define to 1 if you have the <sys/endian.h> header file. */
-#define HAVE_SYS_ENDIAN_H 1
+/* #undef HAVE_SYS_ENDIAN_H */
 
 /* Define to 1 if you have the <sys/epoll.h> header file. */
 #define HAVE_SYS_EPOLL_H 1
@@ -243,7 +269,7 @@
 #define HAVE_SYS_POLL_H 1
 
 /* Define to 1 if you have the <sys/signalfd.h> header file. */
-/* #undef HAVE_SYS_SIGNALFD_H */
+#define HAVE_SYS_SIGNALFD_H 1
 
 /* Define to 1 if you have the <sys/signal.h> header file. */
 #define HAVE_SYS_SIGNAL_H 1
@@ -260,24 +286,27 @@
 /* Define to 1 if you have the <sys/types.h> header file. */
 #define HAVE_SYS_TYPES_H 1
 
-/* Define to 1 if you have the `timerfd' function. */
-/* #undef HAVE_TIMERFD */
-
 /* can use __thread to define thread-local variables */
-/* #undef HAVE_TLS */
+#define HAVE_TLS 1
 
 /* Define to 1 if you have the <unistd.h> header file. */
 #define HAVE_UNISTD_H 1
 
+/* Define to 1 if you have a usable <linux/futex.h> header file. */
+#define HAVE_USABLE_LINUX_FUTEX_H 1
+
 /* Define to 1 if you have the `utimensat' function. */
-/* #undef HAVE_UTIMENSAT */
+#define HAVE_UTIMENSAT 1
 
 /* Define to 1 if you're using Linux 2.4.x */
 /* #undef KERNEL_2_4 */
 
-/* Define to 1 if you're using Linux 2.6.x */
+/* Define to 1 if you're using Linux 2.6.x or Linux 3.x */
 #define KERNEL_2_6 1
 
+/* configured default page size 4k */
+#define MIPS_PAGE_SHIFT 12
+
 /* Define to 1 if your C compiler doesn't accept -c and -o together. */
 /* #undef NO_MINUS_C_MINUS_O */
 
@@ -291,7 +320,7 @@
 #define PACKAGE_NAME "Valgrind"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "Valgrind 3.6.1"
+#define PACKAGE_STRING "Valgrind 3.8.1"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "valgrind"
@@ -300,10 +329,10 @@
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "3.6.1"
+#define PACKAGE_VERSION "3.8.1"
 
-/* Define as the return type of signal handlers (`int' or `void'). */
-#define RETSIGTYPE void
+/* configured memory alignment 2*PAGE_SIZE */
+#define SHM_ALIGNMENT 2*(1UL << MIPS_PAGE_SHIFT)
 
 /* Define to 1 if you have the ANSI C header files. */
 #define STDC_HEADERS 1
@@ -312,7 +341,7 @@
 #define TIME_WITH_SYS_TIME 1
 
 /* Version number of package */
-#define VERSION "3.6.1"
+#define VERSION "3.8.1"
 
 /* Temporary files directory */
 #define VG_TMPDIR "/data/local/tmp"
@@ -328,12 +357,3 @@
 
 /* Define to `int' if <sys/types.h> doesn't define. */
 /* #undef uid_t */
-
-/* path to egrep */
-#define EGREP_PATH "/system/bin/grep -E"
-
-/* path to sh */
-#define SH_PATH "/system/bin/sh"
-
-/* path to strings */
-#define STRINGS_PATH "/system/bin/strings"
diff --git a/main/coregrind/m_coredump/coredump-elf.c b/main/coregrind/m_coredump/coredump-elf.c
index 42a1965..066eaf3 100644
--- a/main/coregrind/m_coredump/coredump-elf.c
+++ b/main/coregrind/m_coredump/coredump-elf.c
@@ -136,17 +136,6 @@
    phdr->p_align = VKI_PAGE_SIZE;
 }
 
-#if defined(VGPV_arm_linux_android) || defined(VGPV_x86_linux_android)
-/* Android's libc doesn't provide a definition for this.  Hence: */
-typedef
-   struct {
-      Elf32_Word n_namesz;
-      Elf32_Word n_descsz;
-      Elf32_Word n_type;
-   }
-   Elf32_Nhdr;
-#endif
-
 struct note {
    struct note *next;
    ESZ(Nhdr) note;
diff --git a/main/coregrind/m_debuginfo/readelf.c b/main/coregrind/m_debuginfo/readelf.c
index d78dc7a..f5a94b4 100644
--- a/main/coregrind/m_debuginfo/readelf.c
+++ b/main/coregrind/m_debuginfo/readelf.c
@@ -1249,7 +1249,13 @@
          VG_(sprintf)(debugpath, "%s/.debug/%s", objdir, debugname);
          if ((addr = open_debug_file(debugpath, NULL, crc, rel_ok, &size)) == 0) {
             VG_(sprintf)(debugpath, "/usr/lib/debug%s/%s", objdir, debugname);
-            addr = open_debug_file(debugpath, NULL, crc, rel_ok, &size);
+            if ((addr = open_debug_file(debugpath, NULL, crc, rel_ok, &size)) == 0) {
+#if defined(VGPV_arm_linux_android) || defined(VGPV_x86_linux_android)
+               VG_(sprintf)(debugpath, "/data/local/symbols%s/%s", objdir,
+                            debugname);
+               addr = open_debug_file(debugpath, NULL, crc, rel_ok, &size);
+#endif
+            }
          }
       }
 
diff --git a/tsan/Android.mk b/tsan/Android.mk
deleted file mode 100644
index 7716bce..0000000
--- a/tsan/Android.mk
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (C) 2011 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-LOCAL_PATH:= $(call my-dir)
-
-ifneq ($(filter arm x86,$(TARGET_ARCH)),)
-
-common_cflags := \
-	-Wall -Wno-deprecated -fno-exceptions -fno-stack-protector \
-	-DTS_VALGRIND=1 \
-	-DTS_VERSION=\"exported\" \
-	-DVGA_$(TARGET_ARCH)=1 \
-	-DVGO_linux=1 \
-	-DVGP_$(TARGET_ARCH)_linux=1 \
-	-DVG_PLATFORM=\"$(TARGET_ARCH)-linux\" \
-	-D_STLP_NO_IOSTREAMS=1 \
-	-U_FORTIFY_SOURCE
-
-common_includes := \
-	external/valgrind/main \
-	external/valgrind/main/include \
-	external/valgrind/main/VEX/pub \
-	external/valgrind/dynamic_annotations
-
-ifeq ($(TARGET_ARCH),arm)
-tool_ldflags := -static -Wl,--build-id=none,-Ttext=0x38000000 -nodefaultlibs -nostartfiles -u _start -e_start
-else
-tool_ldflags := -static -Wl,-Ttext=0x38000000 -nodefaultlibs -nostartfiles -u _start -e_start
-endif
-
-preload_ldflags := -nodefaultlibs -Wl,-z,interpose,-z,initfirst
-# Remove this when the all toolchains are GCC 4.4
-ifeq ($(TARGET_ARCH),arm)
-  preload_ldflags += -Wl,--icf=none
-endif
-
-# TODO(eugenis): Add ts_event_names.h generation step
-
-# Build tsan-$(TARGET_ARCH)-linux
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := tsan-$(TARGET_ARCH)-linux
-LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE_CLASS := SHARED_LIBRARIES
-LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/valgrind
-LOCAL_ARM_MODE := arm
-LOCAL_FORCE_STATIC_EXECUTABLE := true
-LOCAL_NO_CRT := true
-LOCAL_SYSTEM_SHARED_LIBRARIES :=
-LOCAL_CPP_EXTENSION := .cc
-LOCAL_SRC_FILES := \
-	thread_sanitizer.cc \
-	ts_valgrind.cc \
-	ts_valgrind_libc.cc \
-	ts_util.cc \
-	suppressions.cc \
-	ignore.cc \
-	common_util.cc \
-	ts_race_verifier.cc
-LOCAL_C_INCLUDES := \
-        bionic \
-        external/stlport/stlport \
-	$(common_includes)
-LOCAL_LDFLAGS := $(tool_ldflags)
-LOCAL_CFLAGS := $(common_cflags)
-LOCAL_CXXFLAGS := $(common_cxxflags)
-LOCAL_RTTI_FLAG := -fno-rtti
-LOCAL_STATIC_LIBRARIES := libcoregrind-$(TARGET_ARCH)-linux libvex-$(TARGET_ARCH)-linux
-
-include $(BUILD_EXECUTABLE)
-
-
-# Build vgpreload_tsan-$(TARGET_ARCH)-linux.so
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := vgpreload_tsan-$(TARGET_ARCH)-linux
-LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE_CLASS := SHARED_LIBRARIES
-LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/valgrind
-LOCAL_ARM_MODE := arm
-LOCAL_STRIP_MODULE := false
-LOCAL_NO_CRT := true
-LOCAL_PRELINK_MODULE := false
-LOCAL_SRC_FILES := \
-	ts_valgrind_intercepts.c
-LOCAL_C_INCLUDES := $(common_includes)
-LOCAL_LDFLAGS := $(preload_ldflags)
-LOCAL_CFLAGS := $(common_cflags)
-LOCAL_RTTI_FLAG := -fno-rtti
-
-include $(BUILD_SHARED_LIBRARY)
-
-endif
diff --git a/tsan/MODULE_LICENSE_GPL b/tsan/MODULE_LICENSE_GPL
deleted file mode 100644
index e69de29..0000000
--- a/tsan/MODULE_LICENSE_GPL
+++ /dev/null
diff --git a/tsan/Makefile b/tsan/Makefile
deleted file mode 100644
index d63b3d4..0000000
--- a/tsan/Makefile
+++ /dev/null
@@ -1,297 +0,0 @@
-SVN_ROOT=..
-OPT=1
-
-include ../common.mk
-
-OUTDIR=bin
-VALGRIND_ROOT=../third_party/valgrind
-VALGRIND_INST_ROOT=../tsan_inst
-STLPORT_ROOT=../third_party/stlport
-GTEST_ROOT=../third_party/googletest
-OFFLINE=1
-
-TS_VERSION=$(shell svnversion)
-
-COPT=-O -fno-omit-frame-pointer
-CWARN=-Wall -Wmissing-prototypes -Wshadow -Wpointer-arith -Wstrict-prototypes \
-      -Wmissing-declarations -Wno-format-zero-length \
-      -fno-strict-aliasing -Wno-long-long -Wno-pointer-sign \
-      -Wdeclaration-after-statement
-CFLAGS=-g $(COPT) $(CWARN) -fno-strict-aliasing -fno-stack-protector -fpic
-CXXOPT=-O2
-CXXFLAGS=$(CXXOPT) -g -Wall -Wno-deprecated -fno-exceptions # -Wvla
-LDFLAGS=
-
-OFFLINE_DEFINES=-DTS_OFFLINE=1
-
-VG_CXXFLAGS=-fno-rtti -fno-stack-protector
-VG_DEFINES=-DVGA_$(ARCH)=1 -DVGO_$(OS)=1 -DVGP_$(ARCH_OS)=1 -D_STLP_NO_IOSTREAMS=1 -DTS_VALGRIND=1
-VG_INCLUDES=-I$(VALGRIND_ROOT) -I$(VALGRIND_ROOT)/include -I$(VALGRIND_ROOT)/VEX/pub -I$(STLPORT_ROOT)
-
-PIN_CXXFLAGS_L=-fPIC
-PIN_DEFINES=-DBIGARRAY_MULTIPLIER=1 -DUSING_XED -DTARGET_$(PIN_ARCH2)  -DHOST_$(PIN_ARCH2)   -DTARGET_LINUX -DTS_PIN=1
-PIN_INCLUDES=-I$(PIN_ROOT)/source/include -I$(PIN_ROOT)/source/include/gen \
-             -I$(PIN_ROOT)/extras/xed2-$(PIN_ARCH)/include -I$(PIN_ROOT)/extras/components/include
-PIN_LIBPATHS=-L$(PIN_ROOT)/extras/xed2-$(PIN_ARCH)/lib -L$(PIN_ROOT)/$(PIN_ARCH)/lib -L$(PIN_ROOT)/$(PIN_ARCH)/lib-ext
-PIN_LDFLAGS=-g -shared -Wl,-Bsymbolic -Wl,--version-script=$(PIN_ROOT)/source/include/pintool.ver
-PIN_LIBS= -lpin -lxed -ldwarf -lelf -ldl -lpthread
-
-DR_CXXFLAGS=-fPIC -fno-rtti -fno-stack-protector
-DR_DEFINES=-D$(DR_OS) -D$(DR_ARCH)
-DR_INCLUDES=-I$(DYNAMORIO_ROOT)/include
-DR_LDFLAGS=-shared -nostartfiles -nodefaultlibs -lgcc
-DR_LIBS=
-
-OS=   # linux, darwin
-ARCH= # x86, amd64
-D=    # <empty>, -debug
-ARCHOS=$(ARCH)-$(OS)
-ARCH_OS=$(ARCH)_$(OS)
-
-# prefix of binary files
-P=$(OUTDIR)/$(ARCHOS)$(D)-
-OFF=$(P)off-
-VGP=$(P)vg-
-PINP=$(P)pin-
-PINMTP=$(P)pinmp-
-DRP=$(P)dr-
-
-STRIP=strip
-
-INCLUDES=-I../dynamic_annotations
-DEFINES=-DTS_VERSION=\"$(TS_VERSION)\"
-
-ifeq ($(DEBUG), 1)
-  D=-debug
-  DEFINES+=-DDEBUG=1
-else
-  D=
-endif
-
-ifeq ($(ARCH), x86)
-  PIN_ARCH=ia32
-  PIN_ARCH2=IA32
-  DR_ARCH=X86_32
-else ifeq ($(ARCH), amd64)
-  PIN_ARCH=intel64
-  PIN_ARCH2=IA32E
-  DR_ARCH=X86_64
-  DR_LD_SCRIPT=-Xlinker -T -Xlinker ./dr64_ldscript # TODO(kcc) this is too hackish...
-endif
-
-ifeq ($(OS), linux)
-  PIN_CXXFLAGS=$(PIN_CXXFLAGS_L)
-  VG_LD_FLAGS=-Wl,--build-id=none -Wl,-Ttext=0x38000000 -static -nodefaultlibs -nostartfiles -u _start 
-  VG_LD_PRELOAD_FLAGS= -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst
-  DR_OS=LINUX
-  STRIP=strip -g  # keep some symbols for profiling.
-else ifeq ($(OS), darwin)
-ifeq ($(ARCH), x86)
-  VG_LD_FLAGS=-Wl,-u,__start -Wl,-e,__start -Wl,-bind_at_load /usr/lib/dyld -arch i386 \
-      -Wl,-seg1addr,0xf0080000 -Wl,-stack_addr,0xf0080000 -Wl,-stack_size,0x80000 -Wl,-pagezero_size,0xf0000000 \
-      -nodefaultlibs -nostartfiles
-  VG_LD_PRELOAD_FLAGS=-arch i386 -dynamic -dynamiclib -all_load
-endif
-ifeq ($(ARCH), amd64)
-  VG_LD_FLAGS=-Wl,-u,__start -Wl,-e,__start -Wl,-bind_at_load /usr/lib/dyld -arch x86_64 \
-      -Wl,-seg1addr,0xf0080000 -Wl,-stack_addr,0xf0080000 -Wl,-stack_size,0x80000 -Wl,-pagezero_size,0xf0000000 \
-      -nodefaultlibs -nostartfiles
-  VG_LD_PRELOAD_FLAGS=-arch x86_64 -dynamic -dynamiclib -all_load
-endif
-  VG_CXXFLAGS+=-fno-weak -bind_at_load -fno-PIC
-else ifeq ($(OS), windows)
-  VALGRIND_ROOT= # no valgrind on windows :(
-  CXXFLAGS=/c /Zi /MT /EHs- /EHa- /wd4530 /D_CRT_SECURE_NO_DEPRECATE /D_SECURE_SCL=0 /D_HAS_ITERATOR_DEBUGGING=0 /nologo /Gy /Ox /GL
-  PIN_DEFINES=/DTARGET_WINDOWS /DBIGARRAY_MULTIPLIER=1 /DUSING_XED /DTARGET_IA32 /DHOST_IA32a /D_SECURE_SCL=0 -DTS_PIN=1
-  PIN_INCLUDES=/I$(PIN_ROOT)/source/include /I$(PIN_ROOT)/source/include/gen /I$(PIN_ROOT)/extras/xed2-ia32/include
-  PIN_LDFLAGS=/LTCG /DEBUG /DLL /EXPORT:main /NODEFAULTLIB /NOLOGO /INCREMENTAL:NO /OPT:REF /MACHINE:x86 /ENTRY:Ptrace_DllMainCRTStartup@12 /BASE:0x55000000
-  LDFLAGS=/LTCG
-  PIN_LIBPATHS= /LIBPATH:$(PIN_ROOT)/ia32/lib /LIBPATH:$(PIN_ROOT)/ia32/lib-ext /LIBPATH:$(PIN_ROOT)/extras/xed2-ia32/lib
-  PIN_LIBS=pin.lib libxed.lib libcpmt.lib libcmt.lib pinvm.lib kernel32.lib ntdll-32.lib winmm.lib
-  DR_OS=WINDOWS
-else
-  OS=UNKNOWN_OS
-endif
-
-
-
-VALGRIND_LIBS=$(VALGRIND_ROOT)/coregrind/libcoregrind-$(ARCHOS).a \
-		  $(VALGRIND_ROOT)/VEX/libvex-$(ARCHOS).a
-
-all: TS_valgrind TS_pin TS_offline TS_dynamorio test
-
-l: l32 l64
-lo: l32o l64o
-ld: l32d l64d
-l32: l32d l32o
-l64: l64d l64o
-
-
-l64d:
-	$(MAKE) all OS=linux ARCH=amd64 DEBUG=1
-l64o:
-	$(MAKE) all OS=linux ARCH=amd64 DEBUG=0
-l32d:
-	$(MAKE) all OS=linux ARCH=x86 DEBUG=1
-l32o:
-	$(MAKE) all OS=linux ARCH=x86 DEBUG=0
-la:
-	$(MAKE) all OS=linux ARCH=arm DEBUG=0
-
-w: w32
-w32: w32o w32d
-
-w32d:
-	$(MAKE) all OS=windows ARCH=x86 DEBUG=1
-
-w32o:
-	$(MAKE) all OS=windows ARCH=x86 DEBUG=0
-
-m: m32 m64
-m32: m32o m32d
-m64: m64o m64d
-
-m32o:
-	$(MAKE) all OS=darwin ARCH=x86 DEBUG=0
-m32d:
-	$(MAKE) all OS=darwin ARCH=x86 DEBUG=1
-m64o:
-	$(MAKE) all OS=darwin ARCH=amd64 DEBUG=0
-m64d:
-	$(MAKE) all OS=darwin ARCH=amd64 DEBUG=1
-
-install:
-	@echo The 'install' target is deprecated.
-
-self-contained: TS_valgrind
-	./mk-self-contained-valgrind.sh $(VALGRIND_INST_ROOT) tsan$(D) $(OUTDIR)/tsan-$(ARCHOS)$(D)-self-contained.sh
-self-contained-stripped: TS_valgrind
-	$(STRIP) $(OUTDIR)/tsan-*$(OS)
-	./mk-self-contained-valgrind.sh $(VALGRIND_INST_ROOT) tsan$(D) $(OUTDIR)/tsan-$(ARCHOS)$(D)-self-contained.sh
-
-TSAN_SFX_DIR=tsan-$(ARCHOS)
-sfx:
-	rm -rf $(TSAN_SFX_DIR)
-	mkdir -p $(TSAN_SFX_DIR)
-	cp -r $(PIN_ROOT)/ia32 \
-			bin/$(ARCHOS)-debug-ts_pin.dll bin/$(ARCHOS)-ts_pin.dll \
-			bin/$(ARCHOS)-ts_pinmt.dll \
-		  tsan.bat tsan-debug.bat tsan_mt.bat $(TSAN_SFX_DIR)
-	cp license_for_windows.txt $(TSAN_SFX_DIR)/LICENSE
-	7z a -sfx tsan-$(ARCHOS)-sfx.exe $(TSAN_SFX_DIR)
-	rm -rf $(TSAN_SFX_DIR)
-
-w32-sfx: w32o w32d
-	$(MAKE) sfx OS=windows ARCH=x86
-
-ifeq ($(VALGRIND_ROOT), )
-TS_valgrind:
-	@echo VALGRIND_ROOT is not set. Not building the Valgrind-based variant.
-else
-TS_valgrind:  $(OUTDIR)/tsan$D-$(ARCHOS) $(OUTDIR)/vgpreload_tsan$D-$(ARCHOS).so
-endif
-
-ifeq ($(PIN_ROOT), )
-TS_pin:
-	@echo PIN_ROOT is not set. Not building the PIN-based variant.
-else
-TS_pin: $(P)ts_pinmt.$(SO) $(P)ts_pin.$(SO)
-endif
-
-ifeq ($(DYNAMORIO_ROOT), )
-TS_dynamorio:
-	@echo DYNAMORIO_ROOT is not set. Not building the DynamoRio-based variant.
-else
-TS_dynamorio: $(P)ts_dynamorio.$(SO)
-endif
-
-
-ifeq ($(OFFLINE), 1)
-TS_offline: $(P)ts_offline$(EXE)
-else
-TS_offline:
-endif
-
-ifeq ($(GTEST_ROOT), )
-test:
-	@echo GTEST_ROOT is not set. Not building GTEST-based tests.
-else
-test: $(P)suppressions_test$(EXE) $(P)thread_sanitizer_test$(EXE)
-endif
-
-$(OUTDIR):
-	mkdir -p $(OUTDIR)
-
-TS_HEADERS=thread_sanitizer.h ts_util.h suppressions.h ignore.h ts_replace.h ts_heap_info.h \
-	   ts_simple_cache.h ts_stats.h ts_lock.h ts_events.h ts_event_names.h \
-	   ts_trace_info.h ts_race_verifier.h dense_multimap.h \
-           ts_atomic.h ts_atomic_int.h \
-	   ../dynamic_annotations/dynamic_annotations.h
-ts_event_names.h: ts_events.h
-	sed -n '/^enum/,/^};/ {s/enum EventType/static const char *kEventNames[] = /; s/^  \([A-Z_][A-Z_]*\)/  "\1"/g; p;}' $< > $@
-TS_VG_HEADERS=ts_valgrind.h ts_valgrind_client_requests.h
-
-TS_VG_OBJECTS=$(VGP)thread_sanitizer.o $(VGP)ts_valgrind.o $(VGP)ts_valgrind_libc.o $(VGP)ts_util.o $(VGP)suppressions.o $(VGP)ignore.o $(VGP)common_util.o $(VGP)ts_race_verifier.o $(VGP)ts_atomic.o
-TS_PIN_OBJECTS=$(PINP)ts_pin.$(OBJ) $(PINP)ts_util.$(OBJ) $(PINP)thread_sanitizer.$(OBJ) $(PINP)suppressions.$(OBJ) $(PINP)ignore.$(OBJ) $(PINP)common_util.$(OBJ) $(PINP)ts_race_verifier.$(OBJ) $(PINP)ts_atomic.$(OBJ)
-TS_PINMT_OBJECTS=$(PINMTP)ts_pin.$(OBJ) $(PINMTP)ts_util.$(OBJ) $(PINMTP)thread_sanitizer.$(OBJ) $(PINMTP)suppressions.$(OBJ) $(PINMTP)ignore.$(OBJ) $(PINMTP)common_util.$(OBJ) $(PINMTP)ts_race_verifier.$(OBJ) $(PINMTP)ts_atomic.$(OBJ)
-TS_OFFLINE_OBJECTS=$(OFF)ts_offline.$(OBJ) $(OFF)thread_sanitizer.$(OBJ) $(OFF)ts_util.$(OBJ) $(OFF)suppressions.$(OBJ) $(OFF)ignore.$(OBJ) $(OFF)common_util.$(OBJ) $(OFF)ts_atomic.$(OBJ)
-TS_DR_OBJECTS=$(DRP)ts_dynamorio.$(OBJ) $(DRP)ts_util.$(OBJ)
-
-$(P)%.$(OBJ): %.cc $(TS_HEADERS) | $(OUTDIR)
-	$(CXX) $(CXXFLAGS) $(ARCHFLAGS) $(O)$@ -c $< $(DEFINES) $(INCLUDES)
-
-$(OFF)%.$(OBJ): %.cc $(TS_HEADERS) | $(OUTDIR)
-	$(CXX) $(CXXFLAGS) $(ARCHFLAGS) $(OFFLINE_DEFINES) $(O)$@ -c $< $(DEFINES) $(INCLUDES)
-
-$(VGP)%.o: %.cc $(TS_HEADERS) $(TS_VG_HEADERS) | $(OUTDIR)
-	$(CXX) $(CXXFLAGS) $(VG_CXXFLAGS) $(ARCHFLAGS) $(VG_INCLUDES) $(VG_DEFINES) -o $@ -c $< $(DEFINES) $(INCLUDES)
-
-$(PINP)%.$(OBJ): %.cc $(TS_HEADERS) $(TS_PIN_HEADERS) | $(OUTDIR)
-	$(CXX) $(CXXFLAGS) $(PIN_CXXFLAGS) $(ARCHFLAGS) $(PIN_INCLUDES) $(PIN_DEFINES) $(O)$@ -c $< $(DEFINES) $(INCLUDES)
-
-$(PINMTP)%.$(OBJ): %.cc $(TS_HEADERS) $(TS_PIN_HEADERS) | $(OUTDIR)
-	$(CXX) $(CXXFLAGS) $(PIN_CXXFLAGS) $(ARCHFLAGS) $(PIN_INCLUDES) $(PIN_DEFINES) $(O)$@ -c $< $(DEFINES) $(INCLUDES) -DTS_SERIALIZED=0
-
-$(DRP)%.$(OBJ): %.cc $(TS_HEADERS) $(TS_DR_HEADERS) | $(OUTDIR)
-	$(CXX) $(CXXFLAGS) $(DR_CXXFLAGS) $(ARCHFLAGS) $(DR_INCLUDES) $(DR_DEFINES) $(O)$@ -c $< $(DEFINES) $(INCLUDES)
-
-$(P)gtest-%.$(OBJ): %.cc $(TS_HEADERS) | $(OUTDIR)
-	$(CXX) $(CXXFLAGS) $(ARCHFLAGS) -I$(GTEST_ROOT)/include $(O)$@ -c $<
-
-$(P)preload-%.o: %.c $(TS_HEADERS) $(TS_VG_HEADERS) | $(OUTDIR)
-	$(CC) $(CFLAGS) $(ARCHFLAGS) $(VG_INCLUDES) $(VG_DEFINES) -o $@ -c $<
-
-$(OUTDIR)/tsan$(D)-$(ARCHOS): $(TS_VG_OBJECTS)
-	$(LTLD) $(LDFLAGS) $(ARCHFLAGS) $(VG_LD_FLAGS) -o $@ $^ $(VALGRIND_LIBS) -lgcc
-	ln -sf `pwd`/$@ $(VALGRIND_INST_ROOT)/lib/valgrind/  # install the symlink into the valgrind inst dir.
-
-$(OUTDIR)/vgpreload_tsan$(D)-$(ARCHOS).so: $(P)preload-ts_valgrind_intercepts.o
-	$(LD) $(LDFLAGS)  $(ARCHFLAGS) $(VG_LD_PRELOAD_FLAGS) -o $@  $<
-	ln -sf `pwd`/$@  $(VALGRIND_INST_ROOT)/lib/valgrind/  # install the symlink into the valgrind inst dir.
-
-$(P)ts_offline$(EXE): $(TS_OFFLINE_OBJECTS)
-	$(LD) $(LDFLAGS) $(ARCHFLAGS) $(LINKO)$@ $^
-
-$(P)suppressions_test$(EXE): $(P)gtest-suppressions_test.$(OBJ) $(P)suppressions.$(OBJ) $(P)common_util.$(OBJ) $(P)ts_util.$(OBJ) $(GTEST_LIB)
-	$(LD) $(LDFLAGS) $(ARCHFLAGS) $(LINKO)$@ $^
-
-$(P)thread_sanitizer_test$(EXE): $(P)gtest-thread_sanitizer_test.$(OBJ) $(P)ts_util.$(OBJ) $(GTEST_LIB)
-	$(LD) $(LDFLAGS) $(ARCHFLAGS) $(LINKO)$@ $^
-
-$(P)ts_pin.so: $(TS_PIN_OBJECTS)
-	$(LD) $(ARCHFLAGS) $(PIN_LDFLAGS) $(PIN_LIBPATHS) -o $@ $^  $(PIN_LIBS)
-
-$(P)ts_pinmt.so: $(TS_PINMT_OBJECTS)
-	$(LD) $(ARCHFLAGS) $(PIN_LDFLAGS) $(PIN_LIBPATHS) -o $@ $^  $(PIN_LIBS)
-
-$(P)ts_pin.dll: $(TS_PIN_OBJECTS)
-	$(LD) $(ARCHFLAGS) $(PIN_LDFLAGS) $(PIN_LIBPATHS)  /IMPLIB:$(PINP)ts_pin.lib /PDB:$(PINP)ts_pin.pdb /OUT:$@  $^  $(PIN_LIBS)
-
-$(P)ts_pinmt.dll: $(TS_PINMT_OBJECTS)
-	$(LD) $(ARCHFLAGS) $(PIN_LDFLAGS) $(PIN_LIBPATHS)  /IMPLIB:$(PINMTP)ts_pin.lib /PDB:$(PINMTP)ts_pin.pdb /OUT:$@  $^  $(PIN_LIBS)
-
-$(P)ts_dynamorio.so: $(TS_DR_OBJECTS)
-	$(LD) $(ARCHFLAGS) $(DR_LDFLAGS) $(DR_LIBPATHS) -o $@ $^  $(DR_LIBS) $(DR_LD_SCRIPT)
-
-clean: GTEST_CLEAN
-	rm -rfv $(OUTDIR) ts_event_names.h core* pintool.log* pin.log $(VALGRIND_INST_ROOT)/lib/valgrind/*tsan*
diff --git a/tsan/README.txt b/tsan/README.txt
deleted file mode 100644
index a7b730c..0000000
--- a/tsan/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-http://code.google.com/p/data-race-test/wiki/ThreadSanitizer
diff --git a/tsan/common_util.cc b/tsan/common_util.cc
deleted file mode 100644
index 78fab3c..0000000
--- a/tsan/common_util.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Copyright (c) 2010-2011, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "common_util.h"
-
-bool StringMatch(const string& wildcard, const string& text) {
-  const char* c_text = text.c_str();
-  const char* c_wildcard = wildcard.c_str();
-  // Start of the current look-ahead. Everything before these positions is a
-  // definite, optimal match.
-  const char* c_text_last = NULL;
-  const char* c_wildcard_last = NULL;
-
-  char last_wc_char = wildcard[wildcard.size() - 1];
-
-  if (last_wc_char == '*' && wildcard.size() == 1) {
-    return true;  // '*' matches everything.
-  }
-
-  if (last_wc_char != '*' && last_wc_char != '?'
-      && last_wc_char != text[text.size() - 1]) {
-    // short cut for the case when the wildcard does not end with '*' or '?'
-    // and the last characters of wildcard and text do not match.
-    return false;
-  }
-
-  while (*c_text) {
-    if (*c_wildcard == '*') {
-      while (*++c_wildcard == '*') {
-        // Skip all '*'.
-      }
-      if (!*c_wildcard) {
-        // Ends with a series of '*'.
-        return true;
-      }
-      c_text_last = c_text;
-      c_wildcard_last = c_wildcard;
-    } else if ((*c_text == *c_wildcard) || (*c_wildcard == '?')) {
-      ++c_text;
-      ++c_wildcard;
-    } else if (c_text_last) {
-      // No match. But we have seen at least one '*', so rollback and try at the
-      // next position.
-      c_wildcard = c_wildcard_last;
-      c_text = c_text_last++;
-    } else {
-      return false;
-    }
-  }
-
-  // Skip all '*' at the end of the wildcard.
-  while (*c_wildcard == '*') {
-    ++c_wildcard;
-  }
-
-  return !*c_wildcard;
-}
-
-string ConvertToPlatformIndependentPath(const string &s) {
-  string ret = s;
-#ifdef _MSC_VER
-  // TODO(timurrrr): do we need anything apart from s/\\///g?
-  size_t it = 0;
-  while ((it = ret.find("\\", it)) != string::npos) {
-    ret.replace(it, 1, "/");
-  }
-#endif // _MSC_VER
-  return ret;
-}
-
-TS_FILE OpenFileReadOnly(const string &file_name, bool die_if_failed) {
-  TS_FILE ret = TS_FILE_INVALID;
-#ifdef TS_VALGRIND
-  SysRes sres = VG_(open)((const Char*)file_name.c_str(), VKI_O_RDONLY, 0);
-  if (!sr_isError(sres))
-    ret = sr_Res(sres);
-#elif defined(_MSC_VER)
-  ret = fopen(file_name.c_str(), "r");
-#else // no TS_VALGRIND
-  ret = open(file_name.c_str(), O_RDONLY);
-#endif
-  if (ret == TS_FILE_INVALID && die_if_failed) {
-    Printf("ERROR: can not open file %s\n", file_name.c_str());
-    exit(1);
-  }
-  return ret;
-}
-
-// Read the contents of a file to string. Valgrind version.
-string ReadFileToString(const string &file_name, bool die_if_failed) {
-  TS_FILE fd = OpenFileReadOnly(file_name, die_if_failed);
-  if (fd == TS_FILE_INVALID) {
-    return string();
-  }
-  char buff[257] = {0};
-  int n_read;
-  string res;
-  while ((n_read = read(fd, buff, sizeof(buff) - 1)) > 0) {
-    buff[n_read] = 0;
-    res.append(buff, n_read);
-  }
-  close(fd);
-  return res;
-}
diff --git a/tsan/common_util.h b/tsan/common_util.h
deleted file mode 100644
index 93eb7bc..0000000
--- a/tsan/common_util.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* Copyright (c) 2010-2011, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TSAN_COMMON_UTIL__
-#define TSAN_COMMON_UTIL__
-
-#include "ts_util.h"
-
-#if defined(__GNUC__)
-  typedef int TS_FILE;
-  #define TS_FILE_INVALID (-1)
-#ifdef TS_LLVM
-  #define read(fd, buf, size) __real_read(fd, buf, size)
-#endif
-#elif defined(_MSC_VER)
-  typedef FILE *TS_FILE;
-  #define TS_FILE_INVALID (NULL)
-  #define read(fd, buf, size) fread(buf, 1, size, fd)
-  #define close fclose
-#endif
-
-bool StringMatch(const string& wildcard, const string& text);
-string ConvertToPlatformIndependentPath(const string &s);
-TS_FILE OpenFileReadOnly(const string &file_name, bool die_if_failed);
-string ReadFileToString(const string &file_name, bool die_if_failed);
-
-#endif
diff --git a/tsan/dense_multimap.h b/tsan/dense_multimap.h
deleted file mode 100644
index cc6ed24..0000000
--- a/tsan/dense_multimap.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-#ifndef TS_DENSE_MULTIMAP_
-#define TS_DENSE_MULTIMAP_
-
-#include "ts_util.h"
-
-// DenseMultimap is imilar to STL multimap, but optimized for memory.
-// DenseMultimap objects are immutable after creation.
-// All CTORs have linear complexity.
-template<class T, int kPreallocatedElements>
-class DenseMultimap {
- public:
-  typedef const T *const_iterator;
-
-  enum RemoveEnum {REMOVE};
-
-  // Create multimap {t1, t2}
-  DenseMultimap(const T &t1, const T &t2) {
-    Allocate(2);
-    if (t1 < t2) {
-      ptr_[0] = t1;
-      ptr_[1] = t2;
-    } else {
-      ptr_[0] = t2;
-      ptr_[1] = t1;
-    }
-    Validate();
-  }
-
-  // Create a copy of m.
-  DenseMultimap(const DenseMultimap &m) {
-    Allocate(m.size());
-    copy(m.begin(), m.end(), ptr_);
-    Validate();
-  }
-
-  // Create multimap m+{t}
-  DenseMultimap(const DenseMultimap &m, const T &t) {
-    Allocate(m.size() + 1);
-    const_iterator it = lower_bound(m.begin(), m.end(), t);
-    copy(m.begin(), it, ptr_);
-    ptr_[it - m.begin()] = t;
-    copy(it, m.end(), ptr_ + (it - m.begin()) + 1);
-    Validate();
-  }
-
-  // Create multimap m-{t}
-  DenseMultimap(const DenseMultimap &m, RemoveEnum remove, const T &t) {
-    const_iterator it = lower_bound(m.begin(), m.end(), t);
-    CHECK(it < m.end() && it >= m.begin());
-    Allocate(m.size() - 1);
-    copy(m.begin(), it, ptr_);
-    copy(it + 1, m.end(), ptr_ + (it - m.begin()));
-    Validate();
-  }
-
-  ~DenseMultimap() {
-    if (size_ > kPreallocatedElements) {
-      CHECK(ptr_ != (T*)&array_);
-      delete [] ptr_;
-    } else {
-      CHECK(ptr_ == (T*)&array_);
-    }
-  }
-
-  size_t size() const { return size_; }
-
-  const T &operator [] (size_t i) const {
-    CHECK(i < size());
-    return ptr_[i];
-  }
-
-  const_iterator begin() const { return ptr_; }
-  const_iterator end()   const { return ptr_ + size(); }
-
-  bool has(const T&t) const {
-    return binary_search(begin(), end(), t);
-  }
-
-  bool operator < (const DenseMultimap &m) const {
-    if (size() != m.size()) return size() < m.size();
-    for (size_t i = 0; i < size(); i++) {
-      if (ptr_[i] != m.ptr_[i])
-        return ptr_[i] < m.ptr_[i];
-    }
-    return false;
-  }
-
- private:
-
-  void Allocate(int required_size) {
-    size_ = required_size;
-    if (size_ <= kPreallocatedElements) {
-      ptr_ = (T*)&array_;
-    } else {
-      ptr_ = new T[size_];
-    }
-  }
-
-  void Validate() {
-    for (size_t i = 1; i < size(); i++) {
-      CHECK(ptr_[i-1] <= ptr_[i]);
-    }
-  }
-
-  T *ptr_;
-  int size_;
-  T array_[kPreallocatedElements];
-};
-
-#endif  // TS_DENSE_MULTIMAP_
diff --git a/tsan/dr64_ldscript b/tsan/dr64_ldscript
deleted file mode 100644
index 98c2647..0000000
--- a/tsan/dr64_ldscript
+++ /dev/null
@@ -1,218 +0,0 @@
-SECTIONS
-{
-  . = 0x72000000;
-  /* Read-only sections, merged into text segment: */
-  PROVIDE (__executable_start = 0x72000000); . = 0x72000000 + SIZEOF_HEADERS;
-  .interp         : { *(.interp) }
-  .note.gnu.build-id : { *(.note.gnu.build-id) }
-  .hash           : { *(.hash) }
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-  .rel.dyn        :
-    {
-      *(.rel.init)
-      *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
-      *(.rel.fini)
-      *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
-      *(.rel.data.rel.ro* .rel.gnu.linkonce.d.rel.ro.*)
-      *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
-      *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
-      *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
-      *(.rel.ctors)
-      *(.rel.dtors)
-      *(.rel.got)
-      *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
-      *(.rel.ldata .rel.ldata.* .rel.gnu.linkonce.l.*)
-      *(.rel.lbss .rel.lbss.* .rel.gnu.linkonce.lb.*)
-      *(.rel.lrodata .rel.lrodata.* .rel.gnu.linkonce.lr.*)
-    }
-  .rela.dyn       :
-    {
-      *(.rela.init)
-      *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
-      *(.rela.fini)
-      *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
-      *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
-      *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
-      *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
-      *(.rela.ctors)
-      *(.rela.dtors)
-      *(.rela.got)
-      *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
-      *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
-      *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
-      *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
-    }
-  .rel.plt        : { *(.rel.plt) }
-  .rela.plt       : { *(.rela.plt) }
-  .init           :
-  {
-    KEEP (*(.init))
-  } =0x90909090
-  .plt            : { *(.plt) }
-  .text           :
-  {
-    *(.text .stub .text.* .gnu.linkonce.t.*)
-    KEEP (*(.text.*personality*))
-    /* .gnu.warning sections are handled specially by elf32.em.  */
-    *(.gnu.warning)
-  } =0x90909090
-  .fini           :
-  {
-    KEEP (*(.fini))
-  } =0x90909090
-  PROVIDE (__etext = .);
-  PROVIDE (_etext = .);
-  PROVIDE (etext = .);
-  .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-  .rodata1        : { *(.rodata1) }
-  .eh_frame_hdr : { *(.eh_frame_hdr) }
-  .eh_frame       : ONLY_IF_RO { KEEP (*(.eh_frame)) }
-  .gcc_except_table   : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
-  /* Adjust the address for the data segment.  We want to adjust up to
-     the same address within the page on the next page up.  */
-  . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
-  /* Exception handling  */
-  .eh_frame       : ONLY_IF_RW { KEEP (*(.eh_frame)) }
-  .gcc_except_table   : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
-  /* Thread Local Storage sections  */
-  .tdata	  : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
-  .tbss		  : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
-  .preinit_array     :
-  {
-    PROVIDE_HIDDEN (__preinit_array_start = .);
-    KEEP (*(.preinit_array))
-    PROVIDE_HIDDEN (__preinit_array_end = .);
-  }
-  .init_array     :
-  {
-     PROVIDE_HIDDEN (__init_array_start = .);
-     KEEP (*(SORT(.init_array.*)))
-     KEEP (*(.init_array))
-     PROVIDE_HIDDEN (__init_array_end = .);
-  }
-  .fini_array     :
-  {
-    PROVIDE_HIDDEN (__fini_array_start = .);
-    KEEP (*(.fini_array))
-    KEEP (*(SORT(.fini_array.*)))
-    PROVIDE_HIDDEN (__fini_array_end = .);
-  }
-  .ctors          :
-  {
-    /* gcc uses crtbegin.o to find the start of
-       the constructors, so we make sure it is
-       first.  Because this is a wildcard, it
-       doesn't matter if the user does not
-       actually link against crtbegin.o; the
-       linker won't look for a file to match a
-       wildcard.  The wildcard also means that it
-       doesn't matter which directory crtbegin.o
-       is in.  */
-    KEEP (*crtbegin.o(.ctors))
-    KEEP (*crtbegin?.o(.ctors))
-    /* We don't want to include the .ctor section from
-       the crtend.o file until after the sorted ctors.
-       The .ctor section from the crtend file contains the
-       end of ctors marker and it must be last */
-    KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
-    KEEP (*(SORT(.ctors.*)))
-    KEEP (*(.ctors))
-  }
-  .dtors          :
-  {
-    KEEP (*crtbegin.o(.dtors))
-    KEEP (*crtbegin?.o(.dtors))
-    KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
-    KEEP (*(SORT(.dtors.*)))
-    KEEP (*(.dtors))
-  }
-  .jcr            : { KEEP (*(.jcr)) }
-  .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro* .gnu.linkonce.d.rel.ro.*) }
-  .dynamic        : { *(.dynamic) }
-  .got            : { *(.got) }
-  . = DATA_SEGMENT_RELRO_END (24, .);
-  .got.plt        : { *(.got.plt) }
-  .data           :
-  {
-    *(.data .data.* .gnu.linkonce.d.*)
-    KEEP (*(.gnu.linkonce.d.*personality*))
-    SORT(CONSTRUCTORS)
-  }
-  .data1          : { *(.data1) }
-  _edata = .; PROVIDE (edata = .);
-  __bss_start = .;
-  .bss            :
-  {
-   *(.dynbss)
-   *(.bss .bss.* .gnu.linkonce.b.*)
-   *(COMMON)
-   /* Align here to ensure that the .bss section occupies space up to
-      _end.  Align after .bss to ensure correct alignment even if the
-      .bss section disappears because there are no input sections.
-      FIXME: Why do we need it? When there is no .bss section, we don't
-      pad the .data section.  */
-   . = ALIGN(. != 0 ? 64 / 8 : 1);
-  }
-  .lbss   :
-  {
-    *(.dynlbss)
-    *(.lbss .lbss.* .gnu.linkonce.lb.*)
-    *(LARGE_COMMON)
-  }
-  . = ALIGN(64 / 8);
-  .lrodata   ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
-  {
-    *(.lrodata .lrodata.* .gnu.linkonce.lr.*)
-  }
-  .ldata   ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
-  {
-    *(.ldata .ldata.* .gnu.linkonce.l.*)
-    . = ALIGN(. != 0 ? 64 / 8 : 1);
-  }
-  . = ALIGN(64 / 8);
-  _end = .; PROVIDE (end = .);
-  . = DATA_SEGMENT_END (.);
-  /* Stabs debugging sections.  */
-  .stab          0 : { *(.stab) }
-  .stabstr       0 : { *(.stabstr) }
-  .stab.excl     0 : { *(.stab.excl) }
-  .stab.exclstr  0 : { *(.stab.exclstr) }
-  .stab.index    0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  .comment       0 : { *(.comment) }
-  /* DWARF debug sections.
-     Symbols in the DWARF debugging sections are relative to the beginning
-     of the section so we begin them at 0.  */
-  /* DWARF 1 */
-  .debug          0 : { *(.debug) }
-  .line           0 : { *(.line) }
-  /* GNU DWARF 1 extensions */
-  .debug_srcinfo  0 : { *(.debug_srcinfo) }
-  .debug_sfnames  0 : { *(.debug_sfnames) }
-  /* DWARF 1.1 and DWARF 2 */
-  .debug_aranges  0 : { *(.debug_aranges) }
-  .debug_pubnames 0 : { *(.debug_pubnames) }
-  /* DWARF 2 */
-  .debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
-  .debug_abbrev   0 : { *(.debug_abbrev) }
-  .debug_line     0 : { *(.debug_line) }
-  .debug_frame    0 : { *(.debug_frame) }
-  .debug_str      0 : { *(.debug_str) }
-  .debug_loc      0 : { *(.debug_loc) }
-  .debug_macinfo  0 : { *(.debug_macinfo) }
-  /* SGI/MIPS DWARF 2 extensions */
-  .debug_weaknames 0 : { *(.debug_weaknames) }
-  .debug_funcnames 0 : { *(.debug_funcnames) }
-  .debug_typenames 0 : { *(.debug_typenames) }
-  .debug_varnames  0 : { *(.debug_varnames) }
-  /* DWARF 3 */
-  .debug_pubtypes 0 : { *(.debug_pubtypes) }
-  .debug_ranges   0 : { *(.debug_ranges) }
-  .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-  /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) }
-}
\ No newline at end of file
diff --git a/tsan/get_and_build_tsan.sh b/tsan/get_and_build_tsan.sh
deleted file mode 100755
index 1ed13b7..0000000
--- a/tsan/get_and_build_tsan.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-
-# Where to install Valgrind with ThreadSanitizer.
-VALGRIND_INST_ROOT="$1"
-SVN_ROOT="$2"
-
-if [ "$VALGRIND_INST_ROOT" == "" ]; then
-  echo "Usage: $0 /tsan/installation/path [svn/root/dir]"
-  exit
-fi
-
-if [ "$SVN_ROOT" == "" ]; then
-# Get ThreadSanitizer. This will create directory 'drt'
-  svn co http://data-race-test.googlecode.com/svn/trunk drt || exit 1
-  cd drt || exit 1
-else
-  cd $SVN_ROOT || exit 1
-fi
-
-TOPDIR=`pwd`
-
-VG_ARCH=$(uname -m | sed -e "s/i.86/x86/;s/x86_64/amd64/;s/arm.*/arm/")
-
-# Translate OS to valgrind-style identifiers
-OS=`uname -s`
-if [ "$OS" == "Linux" ]; then
-  VG_OS="linux"
-elif [ "$OS" == "Darwin" ]; then
-  VG_OS="darwin"
-fi
-
-if ! echo -n "$OS $VG_ARCH" | \
-     grep "\(Linux \(amd64\|x86\)\)\|Darwin x86" >/dev/null
-then
-  echo "ThreadSanitizer is not yet supported on $OS $VG_ARCH"
-  exit 1
-fi
-
-echo ------------------------------------------------
-echo Building ThreadSanitizer for $OS $VG_ARCH
-echo ------------------------------------------------
-sleep 1
-
-# Build Valgind.
-cd $TOPDIR/third_party || exit 1
-./update_valgrind.sh || exit 1
-./build_and_install_valgrind.sh $VALGRIND_INST_ROOT || exit 1
-
-cd $TOPDIR/tsan || exit 1
-make -s -j4 OFFLINE= GTEST_ROOT= PIN_ROOT= VALGRIND_INST_ROOT=$VALGRIND_INST_ROOT || exit 1
-# Build the self contained binaries.
-make self-contained OS=$VG_OS ARCH=$VG_ARCH VALGRIND_INST_ROOT=$VALGRIND_INST_ROOT || exit 1
-
-TSAN=$TOPDIR/tsan/bin/tsan-$VG_ARCH-$VG_OS-self-contained.sh
-
-# Test
-cd $TOPDIR/unittest || exit 1
-make all -s -j4 OS=${VG_OS} ARCH=${VG_ARCH} OPT=1 STATIC=0 || exit 1
-$TSAN --color bin/demo_tests-${VG_OS}-${VG_ARCH}-O1 --gtest_filter="DemoTests.RaceReportDemoTest" || exit 1
-
-# Done
-echo "ThreadSanitizer is built: $TSAN"
diff --git a/tsan/ignore.cc b/tsan/ignore.cc
deleted file mode 100644
index 7d38066..0000000
--- a/tsan/ignore.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-#include "common_util.h"
-
-#include "ignore.h"
-
-IgnoreLists *g_ignore_lists;
-vector<string>* g_ignore_obj;
-IgnoreLists *g_white_lists;
-
-static void SplitStringIntoLinesAndRemoveBlanksAndComments(
-    const string &str, vector<string> *lines) {
-  string cur_line;
-  bool in_comment = false;
-  for (size_t pos = 0; pos < str.size(); pos++) {
-    char ch = str[pos];
-    if (ch == '\n') {
-      if (!cur_line.empty()) {
-        // Printf("++ %s\n", cur_line.c_str());
-        lines->push_back(cur_line);
-      }
-      cur_line.clear();
-      in_comment = false;
-      continue;
-    }
-    if (ch == ' ' || ch == '\t') continue;
-    if (ch == '#') {
-      in_comment = true;
-      continue;
-    }
-    if (!in_comment) {
-      cur_line += ch;
-    }
-  }
-}
-
-static bool CutStringPrefixIfPresent(const string &input, const string &prefix,
-                     /* OUT */ string *output) {
-  if (input.find(prefix) == 0) {
-    *output = input.substr(prefix.size());
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static bool ReadIgnoreLine(string input_line, IgnoreLists *ignore_lists) {
-  string tail;
-  if (CutStringPrefixIfPresent(input_line, "obj:", &tail)) {
-    ignore_lists->ignores.push_back(IgnoreObj(tail));
-  } else if (CutStringPrefixIfPresent(input_line, "src:", &tail)) {
-    ignore_lists->ignores.push_back(IgnoreFile(tail));
-  } else if (CutStringPrefixIfPresent(input_line, "fun:", &tail)) {
-    ignore_lists->ignores.push_back(IgnoreFun(tail));
-  } else if (CutStringPrefixIfPresent(input_line, "fun_r:", &tail)) {
-    ignore_lists->ignores_r.push_back(IgnoreFun(tail));
-  } else if (CutStringPrefixIfPresent(input_line, "fun_hist:", &tail)) {
-    ignore_lists->ignores_hist.push_back(IgnoreFun(tail));
-  } else {
-    return false;
-  }
-  return true;
-}
-
-void ReadIgnoresFromString(const string& ignoreString, IgnoreLists *ignore_lists) {
-  vector<string> lines;
-  SplitStringIntoLinesAndRemoveBlanksAndComments(ignoreString, &lines);
-  for (size_t j = 0; j < lines.size(); j++) {
-    string &line = lines[j];
-    bool line_parsed = ReadIgnoreLine(line, ignore_lists);
-    if (!line_parsed) {
-      Printf("Error reading ignore file line:\n%s\n", line.c_str());
-      CHECK(0);
-    }
-  }
-}
-
-// True iff there exists a triple each of which components is either empty
-// or matches the corresponding string.
-bool TripleVectorMatchKnown(const vector<IgnoreTriple>& v,
-                       const string& fun,
-                       const string& obj,
-                       const string& file) {
-  for (size_t i = 0; i < v.size(); i++) {
-    if ((fun.size() == 0 || StringMatch(v[i].fun, fun)) &&
-        (obj.size() == 0 || StringMatch(v[i].obj, obj)) &&
-        (file.size() == 0 || StringMatch(v[i].file, file))) {
-      if ((fun.size() == 0 || v[i].fun == "*") &&
-          (obj.size() == 0 || v[i].obj == "*") &&
-          (file.size() == 0 || v[i].file == "*")) {
-        // At least one of the matched features should be either non-empty
-        // or match a non-trivial pattern.
-        // For example, a <*, *, filename.ext> triple should NOT match
-        // fun="fun", obj="obj.o", file="".
-        continue;
-      } else {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-bool StringVectorMatch(const vector<string>& v, const string& obj) {
-  for (size_t i = 0; i < v.size(); i++)
-    if (StringMatch(v[i], obj))
-      return true;
-  return false;
-}
diff --git a/tsan/ignore.h b/tsan/ignore.h
deleted file mode 100644
index ae4daa0..0000000
--- a/tsan/ignore.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef TSAN_IGNORE_H__
-#define TSAN_IGNORE_H__
-
-#include "common_util.h"
-
-// A triple of patterns to ignore a function, an object file and a source file
-// by their names.
-struct IgnoreTriple {
-  string fun;
-  string obj;
-  string file;
-
-  IgnoreTriple(string ifun, string iobj, string ifile) : fun(ifun) {
-    obj = ConvertToPlatformIndependentPath(iobj);
-    file = ConvertToPlatformIndependentPath(ifile);
-    CHECK(!((ifun == "*") && (iobj == "*") && (ifile == "*")));
-  }
-};
-
-struct IgnoreObj : public IgnoreTriple {
-  IgnoreObj(string obj) : IgnoreTriple("*", obj, "*") {}
-};
-
-struct IgnoreFun : public IgnoreTriple {
-  IgnoreFun(string fun) : IgnoreTriple(fun, "*", "*") {}
-};
-
-struct IgnoreFile : public IgnoreTriple {
-  IgnoreFile(string file) : IgnoreTriple("*", "*", file) {}
-};
-
-struct IgnoreLists {
-  vector<IgnoreTriple> ignores;
-  vector<IgnoreTriple> ignores_r;
-  vector<IgnoreTriple> ignores_hist;
-};
-
-extern IgnoreLists *g_ignore_lists;
-extern vector<string> *g_ignore_obj;
-
-extern IgnoreLists *g_white_lists;
-
-void ReadIgnoresFromString(const string& ignoreString,
-    IgnoreLists* ignoreLists);
-
-bool TripleVectorMatchKnown(const vector<IgnoreTriple>& v,
-    const string& fun,
-    const string& obj,
-    const string& file);
-
-bool StringVectorMatch(const vector<string>& v, const string& obj);
-
-#endif
diff --git a/tsan/license_for_windows.txt b/tsan/license_for_windows.txt
deleted file mode 100644
index 91b08b3..0000000
--- a/tsan/license_for_windows.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-ThreadSanitizer for Windows depends on two packages from third parties.
-1. PIN, available from http://www.pintool.org/downloads.html.
-2. dbghelp.dll version 6.11.1.404, available from
-   http://msdl.microsoft.com/download/symbols/debuggers/dbg_x86_6.11.1.404.msi
-
-For more details see http://code.google.com/p/data-race-test/wiki/ThreadSanitizerForWindows
-
-Below are the licenses for PIN and dbghelp.dll:
-
------------------------ PIN ------------------------------------------
-IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.  By
-downloading, copying, installing or using the software you agree to
-this license.  If you do not agree to this license, do not download,
-install, copy or use the software.
- 
-Intel Open Source License
-  
-Copyright (c) 2003 Intel Corporation
-All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-   
-Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.  Redistributions
-in binary form must reproduce the above copyright notice, this list of
-conditions and the following disclaimer in the documentation and/or
-other materials provided with the distribution.  Neither the name of
-the Intel Corporation nor the names of its contributors may be used to
-endorse or promote products derived from this software without
-specific prior written permission.
-     
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-'AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE INTEL OR
-ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
------------------------ dbghelp.dll -----------------------------------
-MICROSOFT SOFTWARE LICENSE TERMS
-MICROSOFT DEBUGGING TOOLS FOR WINDOWS 
-These license terms are an agreement between Microsoft Corporation (or based on where you live, one of its affiliates) and you.  Please read them.  They apply to the software named above, which includes the media on which you received it, if any.  The terms also apply to any Microsoft
-·	updates,
-·	supplements,
-·	Internet-based services 
-·	support services, and
-·	Debugging symbol files that you may access over the internet 
-for this software, unless other terms accompany those items.  If so, those terms apply.
-By using the software, you accept these terms.  If you do not accept them, do not use the software.
-If you comply with these license terms, you have the rights below.
-1.	INSTALLATION AND USE RIGHTS.  One user may install and use any number of copies of the software on your devices to design, develop, debug and test your programs.
-2.	ADDITIONAL LICENSING REQUIREMENTS AND/OR USE RIGHTS.
-a.	Distributable Code.  The software contains code that you are permitted to distribute in programs you develop if you comply with the terms below.
-i.	Right to Use and Distribute.  The code and text files listed below are “Distributable Code.”
-·	REDIST.TXT Files.  You may copy and distribute the object code form of code listed in REDIST.TXT files.
-·	Sample Code.  You may modify, copy, and distribute the source and object code form of code marked as “sample.”
-·	Third Party Distribution.  You may permit distributors of your programs to copy and distribute the Distributable Code as part of those programs.
-ii.	Distribution Requirements.  For any Distributable Code you distribute, you must
-·	add significant primary functionality to it in your programs;
-·	require distributors and external end users to agree to terms that protect it at least as much as this agreement; 
-·	display your valid copyright notice on your programs; and
-·	indemnify, defend, and hold harmless Microsoft from any claims, including attorneys’ fees, related to the distribution or use of your programs.
-iii.	Distribution Restrictions.  You may not
-·	alter any copyright, trademark or patent notice in the Distributable Code; 
-·	distribute any symbol files which you may access or use under these license terms for the software;
-·	use Microsoft’s trademarks in your programs’ names or in a way that suggests your programs come from or are endorsed by Microsoft; 
-·	distribute Distributable Code to run on a platform other than the Windows platform;
-·	include Distributable Code in malicious, deceptive or unlawful programs; or
-·	modify or distribute the source code of any Distributable Code so that any part of it becomes subject to an Excluded License.  An Excluded License is one that requires, as a condition of use, modification or distribution, that
-·	the code be disclosed or distributed in source code form; or 
-·	others have the right to modify it.
-3.	Scope of License.  The software is licensed, not sold. This agreement only gives you some rights to use the software.  Microsoft reserves all other rights.  Unless applicable law gives you more rights despite this limitation, you may use the software only as expressly permitted in this agreement.  In doing so, you must comply with any technical limitations in the software that only allow you to use it in certain ways.    You may not
-·	work around any technical limitations in the software;
-·	reverse engineer, decompile or disassemble the software, except and only to the extent that applicable law expressly permits, despite this limitation;
-·	make more copies of the software than specified in this agreement or allowed by applicable law, despite this limitation;
-·	publish the software for others to copy;
-·	rent, lease or lend the software;
-·	transfer the software or this agreement to any third party; or
-·	use the software for commercial software hosting services.   
-4.	INTERNET-BASED SERVICES.  Microsoft provides Internet-based services with the software.  It may change or cancel them at any time.
-a.	Consent for Internet-Based Services.  The software contains features which may connect to Microsoft or service provider computer systems over the Internet.  In some cases, you will not receive a separate notice when they connect.  You may switch these features on or you may choose not to use them.  For more information about these features, see http://www.microsoft.com/info/privacy/default.mspx.  By using these features, you consent to the transmission of this information.  Microsoft does not use the information to identify or contact you.
-b.	Misuse of Internet-based Services.  You may not use these services in any way that could harm them or impair anyone else’s use of them.  You may not use the services to try to gain unauthorized access to any service, data, account or network by any means.     
-  
-5.	BACKUP COPY.  You may make one backup copy of the software.  You may use it only to reinstall the software.
-6.	DOCUMENTATION.  Any person that has valid access to your computer or internal network may copy and use the documentation for your internal, reference purposes.
-7.	Export Restrictions.  The software is subject to United States export laws and regulations.  You must comply with all domestic and international export laws and regulations that apply to the software.  These laws include restrictions on destinations, end users and end use.  For additional information, see www.microsoft.com/exporting <http://www.microsoft.com/exporting>.
-8.	SUPPORT SERVICES. Because this software is “as is,” we may not provide support services for it.
-9.	Entire Agreement.  This agreement, and the terms for supplements, updates, Internet-based services and support services that you use, are the entire agreement for the software and support services.
-10.	Applicable Law.
-a.	United States.  If you acquired the software in the United States, Washington state law governs the interpretation of this agreement and applies to claims for breach of it, regardless of conflict of laws principles.  The laws of the state where you live govern all other claims, including claims under state consumer protection laws, unfair competition laws, and in tort.
-b.	Outside the United States.  If you acquired the software in any other country, the laws of that country apply.
-11.	Legal Effect.  This agreement describes certain legal rights.  You may have other rights under the laws of your country.  You may also have rights with respect to the party from whom you acquired the software.  This agreement does not change your rights under the laws of your country if the laws of your country do not permit it to do so.
-12.	Disclaimer of Warranty.   The software is licensed “as-is.”  You bear the risk of using it.  Microsoft gives no express warranties, guarantees or conditions.  You may have additional consumer rights under your local laws which this agreement cannot change.  To the extent permitted under your local laws, Microsoft excludes the implied warranties of merchantability, fitness for a particular purpose and non-infringement.
-13.	Limitation on and Exclusion of Remedies and Damages.   You can recover from Microsoft and its suppliers only direct damages up to U.S. $5.00.  You cannot recover any other damages, including consequential, lost profits, special, indirect or incidental damages.
-This limitation applies to
-·	anything related to the software, services, content (including code) on third party Internet sites, or third party programs; and
-·	claims for breach of contract, breach of warranty, guarantee or condition, strict liability, negligence, or other tort to the extent permitted by applicable law.
-It also applies even if Microsoft knew or should have known about the possibility of the damages.  The above limitation or exclusion may not apply to you because your country may not allow the exclusion or limitation of incidental, consequential or other damages.
-Please note: As this software is distributed in Quebec, Canada, some of the clauses in this agreement are provided below in French.
-Remarque : Ce logiciel étant distribué au Québec, Canada, certaines des clauses dans ce contrat sont fournies ci-dessous en français.
-EXONÉRATION DE GARANTIE. Le logiciel visé par une licence est offert « tel quel ». Toute utilisation de ce logiciel est à votre seule risque et péril. Microsoft n’accorde aucune autre garantie expresse. Vous pouvez bénéficier de droits additionnels en vertu du droit local sur la protection des consommateurs, que ce contrat ne peut modifier. La ou elles sont permises par le droit locale, les garanties implicites de qualité marchande, d’adéquation à un usage particulier et d’absence de contrefaçon sont exclues.
-LIMITATION DES DOMMAGES-INTÉRÊTS ET EXCLUSION DE RESPONSABILITÉ POUR LES DOMMAGES.  Vous pouvez obtenir de Microsoft et de ses fournisseurs une indemnisation en cas de dommages directs uniquement à hauteur de 5,00 $ US. Vous ne pouvez prétendre à aucune indemnisation pour les autres dommages, y compris les dommages spéciaux, indirects ou accessoires et pertes de bénéfices.
-Cette limitation concerne :
-·	tout  ce qui est relié au logiciel, aux services ou au contenu (y compris le code) figurant sur des sites Internet tiers ou dans des programmes tiers ; et
-·	les réclamations au titre de violation de contrat ou de garantie, ou au titre de responsabilité stricte, de négligence ou d’une autre faute dans la limite autorisée par la loi en vigueur.
-Elle s’applique également, même si Microsoft connaissait ou devrait connaître l’éventualité d’un tel dommage.  Si votre pays n’autorise pas l’exclusion ou la limitation de responsabilité pour les dommages indirects, accessoires ou de quelque nature que ce soit, il se peut que la limitation ou l’exclusion ci-dessus ne s’appliquera pas à votre égard.
-EFFET JURIDIQUE.  Le présent contrat décrit certains droits juridiques. Vous pourriez avoir d’autres droits prévus par les lois de votre pays.  Le présent contrat ne modifie pas les droits que vous confèrent les lois de votre pays si celles-ci ne le permettent pas.
diff --git a/tsan/mk-self-contained-tsan-pin.sh b/tsan/mk-self-contained-tsan-pin.sh
deleted file mode 100755
index f0203bf..0000000
--- a/tsan/mk-self-contained-tsan-pin.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash
-# This scripts builds a self-contained executable file for ThreadSanitizer.
-# Usage:
-#   ./mk-self-contained-tsan.sh            \
-#      /pin/root                           \
-#      /dir/where/tsan-pin-files/reside    \
-#      resulting_binary
-
-# take Pin from here:
-PIN_ROOT="$1"
-# Our .so files are here:
-IN_DIR="$2"
-# Put the result here:
-OUT="$3"
-# The files/dirs to take:
-IN_FILES="tsan_pin.sh bin/*ts_pin.so"
-
-rm -rf $OUT           # remove the old one
-touch  $OUT           # create the new one
-chmod +x $OUT
-
-# Create the header.
-cat << 'EOF' >> $OUT
-#!/bin/bash
-# This is a self-extracting executable of ThreadSanitizerPin.
-# This file is autogenerated by mk-self-contained-tsan-pin.sh.
-
-# We extract the temporary files to $TSAN_EXTRACT_DIR/tsan_pin.XXXXXX
-TSAN_EXTRACT_DIR=${TSAN_EXTRACT_DIR:-/tmp}
-EXTRACT_DIR="$(mktemp -d $TSAN_EXTRACT_DIR/tsan_pin.XXXXXX)"
-
-cleanup() {
-  rm -rf $EXTRACT_DIR
-}
-# We will cleanup on exit.
-trap cleanup EXIT
-
-mkdir -p $EXTRACT_DIR
-chmod +rwx $EXTRACT_DIR
-EOF
-# end of header
-
-# Create the self-extractor
-
-# Exclude unneeded binaries.
-TAR_EXCLUDE="$TAR_EXCLUDE --exclude=*/doc/*       \
-                          --exclude=*/include/*   \
-                          --exclude=*/examples/*   \
-                          "
-# Create the running part.
-
-cat << 'EOF' >> $OUT
-# Extract:
-echo Extracting ThreadSanitizerPin to $EXTRACT_DIR
-sed '1,/^__COMPRESSED_DATA_BELOW__$/d' $0 | tar xz -C $EXTRACT_DIR
-
-export PIN_ROOT=$EXTRACT_DIR
-export TS_ROOT=$EXTRACT_DIR
-$EXTRACT_DIR/tsan_pin.sh "$@"
-EXIT_STATUS=$?
-cleanup # the trap above will handle the cleanup only if we are in bash 3.x
-exit $EXIT_STATUS # make sure to return the exit code from the tool.
-
-__COMPRESSED_DATA_BELOW__
-EOF
-
-# Dump the compressed binary at the very end of the file.
-echo tar zcvh -C $IN_DIR $TAR_EXCLUDE $IN_FILES
-tar zcvh -C $IN_DIR $TAR_EXCLUDE $IN_FILES -C $PIN_ROOT ./{extras,ia32,intel64,pin}  >> $OUT
-
-echo "File $OUT successfully created"
-
diff --git a/tsan/mk-self-contained-valgrind.sh b/tsan/mk-self-contained-valgrind.sh
deleted file mode 100755
index ac6d8da..0000000
--- a/tsan/mk-self-contained-valgrind.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-# This scripts builds a self-contained executable file for Valgrind.
-# Usage:
-#   ./mk-self-contained-valgrind.sh /path/to/valgrind/installation tool_name resulting_binary [tool_flag]
-
-# Take the valgrind installation from here:
-IN_DIR="$1"
-# Tool name:
-TOOL="$2"
-# Put the result here:
-OUT="$3"
-# If not empty, use as the --tool= value:
-if [ "$4" == "" ]
-then
-  TOOLFLAG=$TOOL
-else
-  TOOLFLAG="$4"
-fi
-
-# The files/dirs to take:
-IN_FILES="bin/valgrind lib/valgrind/vgpreload_core* lib/valgrind/*$TOOL* lib/valgrind/default.supp"
-EXCLUDE_FILES="lib/valgrind/*$TOOL-debug*"
-
-rm -f $OUT && touch $OUT && chmod +x $OUT || exit 1
-
-# Create the header.
-cat << 'EOF' >> $OUT || exit 1
-#!/bin/bash
-# This is a self-extracting executable of Valgrind.
-# This file is autogenerated by mk-self-contained-valgrind.sh.
-
-# We extract the temporary files to $VALGRIND_EXTRACT_DIR/valgrind.XXXXXX
-VALGRIND_EXTRACT_DIR=${VALGRIND_EXTRACT_DIR:-/tmp}
-EXTRACT_DIR="$(mktemp -d $VALGRIND_EXTRACT_DIR/valgrind.XXXXXX)"
-
-cleanup() {
-  rm -rf $EXTRACT_DIR
-}
-# We will cleanup on exit.
-trap cleanup EXIT
-
-mkdir -p $EXTRACT_DIR
-chmod +rwx $EXTRACT_DIR
-EOF
-# end of header
-
-# Create the self-extractor
-
-# Create the runner
-cat << 'EOF' >> $OUT || exit 1
-# Extract:
-sed '1,/^__COMPRESSED_DATA_BELOW__$/d' $0 | tar xz -C $EXTRACT_DIR
-
-# Run
-# echo Extracting Valgrind to $EXTRACT_DIR
-export VALGRIND_LIB="$EXTRACT_DIR/lib/valgrind"
-export VALGRIND_LIB_INNER="$EXTRACT_DIR/lib/valgrind"
-EOF
-
-echo "\$EXTRACT_DIR/bin/valgrind --tool=$TOOLFLAG \"\$@\"" >> $OUT || exit 1
-
-cat << 'EOF' >> $OUT || exit 1
-EXIT_STATUS=$?
-cleanup # the trap above will handle the cleanup only if we are in bash 3.x
-exit $EXIT_STATUS # make sure to return the exit code from valgrind.
-
-__COMPRESSED_DATA_BELOW__
-EOF
-
-# Dump the compressed binary at the very end of the file.
-(cd $IN_DIR && tar zcvh --exclude=$EXCLUDE_FILES $IN_FILES) >> $OUT || exit 1
-
-echo "File $OUT successfully created"
diff --git a/tsan/offline_tests/301.tst.gz b/tsan/offline_tests/301.tst.gz
deleted file mode 100644
index 408796c..0000000
--- a/tsan/offline_tests/301.tst.gz
+++ /dev/null
Binary files differ
diff --git a/tsan/offline_tests/311.tst.gz b/tsan/offline_tests/311.tst.gz
deleted file mode 100644
index 327ef52..0000000
--- a/tsan/offline_tests/311.tst.gz
+++ /dev/null
Binary files differ
diff --git a/tsan/offline_tests/README b/tsan/offline_tests/README
deleted file mode 100644
index 2d648b3..0000000
--- a/tsan/offline_tests/README
+++ /dev/null
@@ -1,2 +0,0 @@
-This directory contains tests for ThreadSanitizerOffline.
-Experimental. See ts_offline.cc for details.
diff --git a/tsan/offline_tests/simple_race_1.tst b/tsan/offline_tests/simple_race_1.tst
deleted file mode 100644
index 889a069..0000000
--- a/tsan/offline_tests/simple_race_1.tst
+++ /dev/null
@@ -1,43 +0,0 @@
-# Start thread T0.
-THR_START 0 0 0 0
-
-
-# Create two locks.
-LOCK_CREATE 0 ff0 7777 0
-LOCK_CREATE 0 ff1 7778 0
-
-
-# Start thread T1
-THR_START 1 0 0 0
-
-# Call few functions in T0
-RTN_CALL 0 ca000001 ca000002 0
-RTN_CALL 0 ca000002 ca000003 0
-
-# Call few functions in T1
-RTN_CALL 1 ca100001 ca100002 0
-RTN_CALL 1 ca100002 ca100003 0
-
-# Allocate 0xff bytes of memory in T0
-MALLOC 0 cdeffedc abcd0 ff
-
-# Malloc some more (unrelated)
-MALLOC 0 cdeffedc ccc ff
-MALLOC 0 cdeffedc cccccccc ff
-
-# Acquire lock 7777 in T0
-WRITER_LOCK 0 aa 7777 0
-
-# Write to 0xabcde in T0
-SBLOCK_ENTER 0 ca000003 0 0
-WRITE 0 aa008001 abcde 1
-
-# Acquire reader lock 7778 in T1
-READER_LOCK 1 bb 7778 0
-
-##############
-# Race here: #
-##############
-#
-# Read 0xabcde in T1
-READ 1 aa108001 abcde 1
diff --git a/tsan/pin/Makefile b/tsan/pin/Makefile
deleted file mode 100644
index 021a86e..0000000
--- a/tsan/pin/Makefile
+++ /dev/null
@@ -1,114 +0,0 @@
-
-#PIN_ROOT is taken from env var.
-PIN_BIN=${PIN_ROOT}/pin
-
-INLINE=
-OPTLEVEL=-O3
-CXXFLAGS_L=$(OPTLEVEL) $(INLINE) -Wall -Werror -Wno-unknown-pragmas -g -fno-omit-frame-pointer -fno-strict-aliasing -fPIC
-CXXFLAGS_L64=-DBIGARRAY_MULTIPLIER=1 -DUSING_XED  -DTARGET_IA32E -DHOST_IA32E  -DTARGET_LINUX
-CXXFLAGS_L32=-DBIGARRAY_MULTIPLIER=1 -DUSING_XED  -DTARGET_IA32  -DHOST_IA32   -DTARGET_LINUX
-INCLUDES_L64=-I..  -I$(PIN_ROOT)/extras/xed2-intel64/include -I$(PIN_ROOT)/source/include -I$(PIN_ROOT)/source/include/gen
-INCLUDES_L32=-I..  -I$(PIN_ROOT)/extras/xed2-ia32/include -I$(PIN_ROOT)/source/include -I$(PIN_ROOT)/source/include/gen
-LIBPATHS_L64=-L$(PIN_ROOT)/extras/xed2-intel64/lib -L$(PIN_ROOT)/intel64/lib -L$(PIN_ROOT)/intel64/lib-ext
-LIBPATHS_L32=-L$(PIN_ROOT)/extras/xed2-ia32/lib -L$(PIN_ROOT)/ia32/lib -L$(PIN_ROOT)/ia32/lib-ext
-LDFLAGS_L=-g -shared -Wl,-Bsymbolic -Wl,--version-script=$(PIN_ROOT)/source/include/pintool.ver
-LIBS_L= -lpin  -lxed -ldwarf -lelf -ldl -lpthread
-
-
-ifeq ($(OS), l)
-  SO=so
-  OBJ=o
-  CXX=g++
-  LD=g++
-  LIBS=$(LIBS_L)
-  ifeq ($(B), 64)
-    CXXFLAGS=$(CXXFLAGS_L) $(CXXFLAGS_L64) $(INCLUDES_L64) -m64
-    LDFLAGS=$(LDFLAGS_L) $(LIBPATHS_L64) -m64
-  else
-    CXXFLAGS=$(CXXFLAGS_L) $(CXXFLAGS_L32) $(INCLUDES_L32) -m32
-    LDFLAGS=$(LDFLAGS_L) $(LIBPATHS_L32) -m32
-  endif
-else ifeq ($(OS), w)
-  SO=dll
-  OBJ=obj
-  CXX=cl
-  LD=link
-  CXXFLAGS=/c /MT /EHs- /EHa- /wd4530  /DTARGET_WINDOWS /DBIGARRAY_MULTIPLIER=1 /DUSING_XED /D_CRT_SECURE_NO_DEPRECATE /D_SECURE_SCL=0 /nologo /Gy /O2 /DTARGET_IA32 /DHOST_IA32 \
-	   /I.. /I$(PIN_ROOT)/source/include /I$(PIN_ROOT)/source/include/gen   /I$(PIN_ROOT)/extras/xed2-ia32/include
-  LDFLAGS=/DLL /EXPORT:main /NODEFAULTLIB  /NOLOGO /INCREMENTAL:NO  /OPT:REF  /MACHINE:x86 /ENTRY:Ptrace_DllMainCRTStartup@12 /BASE:0x55000000    \
-	  /LIBPATH:$(PIN_ROOT)/ia32/lib /LIBPATH:$(PIN_ROOT)/ia32/lib-ext  /LIBPATH:$(PIN_ROOT)/extras/xed2-ia32/lib
-  LIBS=pin.lib libxed.lib libcpmt.lib libcmt.lib pinvm.lib kernel32.lib ntdll-32.lib
-else
-
-endif
-
-ifeq ($(DEBUG), 1)
-  BUILD_SUFIX=_deb
-  DEFINES=-DDEBUG=1
-else
-  BUILD_SUFIX=_opt
-  DEFINES=-DINCLUDE_THREAD_SANITIZER_CC=1
-endif
-
-
-SUFIX=_${OS}$(B)$(BUILD_SUFIX)
-
-ALL_OBJECTS=ts_pin$(SUFIX).$(OBJ) ts_util$(SUFIX).$(OBJ) thread_sanitizer$(SUFIX).$(OBJ)
-
-all:
-
-pintool: ts_pin$(SUFIX).$(SO)
-
-l: l32 l64
-l64: l64o l64d
-l32: l32o l32d
-ld: l64d l32d
-lo: l64o l32o
-
-l64d:
-	$(MAKE) pintool OS=l B=64  DEBUG=1
-l64o:
-	$(MAKE) pintool OS=l B=64  DEBUG=0
-
-l32d:
-	$(MAKE) pintool OS=l B=32  DEBUG=1
-l32o:
-	$(MAKE) pintool OS=l B=32  DEBUG=0
-
-w32d:
-	$(MAKE) pintool OS=w B=32  DEBUG=1
-
-
-
-
-ts_pin$(SUFIX).so: $(ALL_OBJECTS)
-	$(LD) $(LDFLAGS) $(LIBPATHS) -o $@ $^  $(LIBS)
-
-ts_pin$(SUFIX).dll: $(ALL_OBJECTS)
-	$(LD) $(LDFLAGS) $(LIBPATHS)  /IMPLIB:ts_pin$(SUFIX).lib /PDB:ts_pin$(SUFIX).pdb /OUT:$@  $^  $(LIBS)
-
-HEADERS=../thread_sanitizer.h ../ts_util.h
-
-
-%$(SUFIX).o: %.cc $(HEADERS)
-	$(CXX) $(CXXFLAGS) -o $@ -c $< $(DEFINES)
-%$(SUFIX).o: ../%.cc $(HEADERS)
-	$(CXX) $(CXXFLAGS) -o $@ -c $< $(DEFINES)
-
-%$(SUFIX).obj: %.cc $(HEADERS)
-	$(CXX) $(CXXFLAGS) /Fo$@ -c $< $(DEFINES)
-%$(SUFIX).obj: ../%.cc $(HEADERS)
-	$(CXX) $(CXXFLAGS) /Fo$@ -c $< $(DEFINES)
-
-
-#all_tests: simple_pin_test.so detach_example.so
-#%.so: %.$(OBJ)
-#	$(CXX) $(LDFLAGS) $(LIBPATHS) -o $@ $<  $(LIBS)
-#
-#
-win_test.exe: win_test.cpp
-	cl /Zi $<
-
-
-clean:
-	rm -fv *.so *.o *.obj *.dll core* pintool.log* pin.log *.exp *.lib *.pdb *.ilk  *.exe
diff --git a/tsan/pin/README.txt b/tsan/pin/README.txt
deleted file mode 100644
index b7622b2..0000000
--- a/tsan/pin/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-So far this is experimental. Don't expect anything to work here.
diff --git a/tsan/pin/simple_pin_test.cc b/tsan/pin/simple_pin_test.cc
deleted file mode 100644
index 60ecd3d..0000000
--- a/tsan/pin/simple_pin_test.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Simple test for PIN.
-// Prints the number of memory accesses.
-// Run: $PIN_ROOT/pin -t `pwd`/simple_pin_test.so -- your_program
-#include "pin.H"
-
-#include <map>
-
-// statistics
-static long long dynamic_memory_access_count;
-static int static_memory_access_count;
-
-//---------- Instrumentation functions ---------
-void InsertBeforeEvent_MemoryAccess(ADDRINT pc) {
-  dynamic_memory_access_count++;
-}
-
-//-------------- PIN callbacks ---------------
-void CallbackForTRACE(TRACE trace, void *v) {
-  for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
-    for (INS ins = BBL_InsHead(bbl); INS_Valid(ins); ins = INS_Next(ins)) {
-      if (INS_IsStackRead(ins) || INS_IsStackWrite(ins))
-        continue;
-      if (INS_IsMemoryRead(ins) || INS_IsMemoryWrite(ins)) {
-        static_memory_access_count++;
-        INS_InsertCall(ins, IPOINT_BEFORE,
-                         (AFUNPTR)InsertBeforeEvent_MemoryAccess,
-                         IARG_INST_PTR, IARG_END);
-      }
-    }
-  }
-}
-
-static void CallbackForFini(INT32 code, void *v) {
-  printf("accesses static  : %d\n", static_memory_access_count);
-  printf("accesses dynamic : %lld\n", dynamic_memory_access_count);
-}
-
-//---------------- main ---------------
-int main(INT32 argc, CHAR **argv) {
-  PIN_Init(argc, argv);
-  PIN_InitSymbols();
-  PIN_AddFiniFunction(CallbackForFini, 0);
-  TRACE_AddInstrumentFunction(CallbackForTRACE, 0);
-  PIN_StartProgram();
-  printf("accesses static  : %d\n", static_memory_access_count);
-  return 0;
-}
diff --git a/tsan/suppressions.cc b/tsan/suppressions.cc
deleted file mode 100644
index 3845963..0000000
--- a/tsan/suppressions.cc
+++ /dev/null
@@ -1,433 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Evgeniy Stepanov.
-
-// This file contains the parser for valgrind-compatible suppressions.
-
-#include "suppressions.h"
-
-// TODO(eugenis): convert checks to warning messages.
-// TODO(eugenis): write tests for incorrect syntax.
-
-enum LocationType {
-  LT_STAR,  // ...
-  LT_OBJ,  // obj:
-  LT_FUN,  // fun:
-};
-
-struct Location {
-  LocationType type;
-  string name;
-};
-
-struct StackTraceTemplate {
-  vector<Location> locations;
-};
-
-struct Suppression {
-  string name;
-  set<string> tools;
-  string warning_name;
-  // Extra information available for some suppression types.
-  // ex.: Memcheck:Param
-  string extra;
-  vector<StackTraceTemplate> templates;
-};
-
-class Parser {
- public:
-  explicit Parser(const string &str)
-      : buffer_(str), next_(buffer_.c_str()),
-        end_(buffer_.c_str() + buffer_.size()), line_no_(0), error_(false) {}
-
-  bool NextSuppression(Suppression* suppression);
-  bool GetError();
-  string GetErrorString();
-  int GetLineNo();
-
- private:
-  bool Eof() { return next_ >= end_; }
-  string NextLine();
-  string NextLineSkipComments();
-  void PutBackSkipComments(string line);
-  bool ParseSuppressionToolsLine(Suppression* supp, string line);
-  bool IsExtraLine(string line);
-  bool ParseStackTraceLine(StackTraceTemplate* trace, string line);
-  bool NextStackTraceTemplate(StackTraceTemplate* trace, bool* last);
-
-  void SetError(string desc);
-
-  const string& buffer_;
-  const char* next_;
-  const char* end_;
-  stack<string> put_back_stack_;
-
-  int line_no_;
-  bool error_;
-  string error_string_;
-};
-
-#define PARSER_CHECK(cond, desc) do {\
-    if (!(cond)) {\
-      SetError(desc);\
-      return false;\
-    }} while ((void)0, 0)
-
-void Parser::SetError(string desc) {
-  error_ = true;
-  error_string_ = desc;
-}
-
-bool Parser::GetError() {
-  return error_;
-}
-
-string Parser::GetErrorString() {
-  return error_string_;
-}
-
-int Parser::GetLineNo() {
-  return line_no_;
-}
-
-string Parser::NextLine() {
-  const char* first = next_;
-  while (!Eof() && *next_ != '\n') {
-    ++next_;
-  }
-  string line(first, next_ - first);
-  if (*next_ == '\n') {
-    ++next_;
-  }
-  ++line_no_;
-  return line;
-}
-
-string Parser::NextLineSkipComments() {
-  string line;
-  if (!put_back_stack_.empty()) {
-    line = put_back_stack_.top();
-    put_back_stack_.pop();
-    return line;
-  }
-  while (!Eof()) {
-    line = NextLine();
-    // Skip empty lines.
-    if (line.empty())
-      continue;
-    // Skip comments.
-    if (line[0] == '#')
-      continue;
-    const char* p = line.c_str();
-    const char* e = p + line.size();
-    // Strip whitespace.
-    while (p < e && (*p == ' ' || *p == '\t'))
-      ++p;
-    if (p >= e)
-      continue;
-    const char* last = e - 1;
-    while (last > p && (*last == ' ' || *last == '\t'))
-      --last;
-    return string(p, last - p + 1);
-  }
-  return "";
-}
-
-void Parser::PutBackSkipComments(string line) {
-  put_back_stack_.push(line);
-}
-
-bool Parser::ParseSuppressionToolsLine(Suppression* supp, string line) {
-  size_t idx = line.find(':');
-  PARSER_CHECK(idx != string::npos, "expected ':' in tools line");
-  string s1 = line.substr(0, idx);
-  string s2 = line.substr(idx + 1);
-  PARSER_CHECK(!s1.empty(), "expected non-empty tool(s) name");
-  PARSER_CHECK(!s2.empty(), "expected non-empty warning name");
-  size_t idx2;
-  while ((idx2 = s1.find(',')) != string::npos) {
-    supp->tools.insert(s1.substr(0, idx2));
-    s1.erase(0, idx2 + 1);
-  }
-  supp->tools.insert(s1);
-  supp->warning_name = s2;
-  return true;
-}
-
-bool Parser::ParseStackTraceLine(StackTraceTemplate* trace, string line) {
-  if (line == "...") {
-    Location location = {LT_STAR, ""};
-    trace->locations.push_back(location);
-    return true;
-  } else {
-    size_t idx = line.find(':');
-    PARSER_CHECK(idx != string::npos, "expected ':' in stack trace line");
-    string s1 = line.substr(0, idx);
-    string s2 = line.substr(idx + 1);
-    if (s1 == "obj") {
-      Location location = {LT_OBJ, s2};
-      trace->locations.push_back(location);
-      return true;
-    } else if (s1 == "fun") {
-      Location location = {LT_FUN, s2};
-      // A suppression frame can only have ( or ) if it comes from Objective-C,
-      // i.e. starts with +[ or -[ or =[
-      PARSER_CHECK(s2.find_first_of("()") == string::npos ||
-                   (s2[1] == '[' && strchr("+-=", s2[0]) != NULL),
-                   "'fun:' lines can't contain '()'");
-
-      // Check that we don't have template arguments in the suppression.
-      {
-        // Caveat: don't be confused by "operator>>" and similar...
-        size_t checked_till = 0;
-        // List of possible >>-like operators, sorted by the operation length.
-        const char *OP[] = {">>=", "<<=",
-                            ">>", "<<",
-                            ">=", "<=",
-                            "->", "->*",
-                            "<", ">"};
-        bool check_failed = false;
-        while (!check_failed && checked_till < s2.size()) {
-          size_t next = s2.find_first_of("<>", checked_till);
-          if (next == string::npos)
-            break;
-
-          if (next < 8) {
-            // operatorX won't fit
-            check_failed = true;
-            break;
-          }
-
-          for (size_t i = 0; i < TS_ARRAY_SIZE(OP); i++) {
-            size_t op_offset = ((string)OP[i]).find(s2[next]);
-            if (op_offset == string::npos)
-              continue;
-            if (next >= 8 + op_offset &&
-                "operator" == s2.substr(next- (8 + op_offset), 8) &&
-                OP[i] == s2.substr(next- op_offset, strlen(OP[i]))) {
-              checked_till = next + strlen(OP[i] + op_offset);
-              break;
-            }
-          }
-        }
-
-        PARSER_CHECK(!check_failed, "'fun:' lines can't contain '<' or '>' "
-                     "except for operators");
-      }
-
-      trace->locations.push_back(location);
-      return true;
-    } else {
-      SetError("bad stack trace line");
-      return false;
-    }
-  }
-}
-
-// Checks if this line can not be parsed by Parser::NextStackTraceTemplate
-// and, therefore, is an extra information for the suppression.
-bool Parser::IsExtraLine(string line) {
-  if (line == "..." || line == "{" || line == "}")
-    return false;
-  if (line.size() < 4)
-    return true;
-  string prefix = line.substr(0, 4);
-  return !(prefix == "obj:" || prefix == "fun:");
-}
-
-bool Parser::NextStackTraceTemplate(StackTraceTemplate* trace,
-    bool* last_stack_trace) {
-  string line = NextLineSkipComments();
-  if (line == "}") {  // No more stack traces in multi-trace syntax
-    *last_stack_trace = true;
-    return false;
-  }
-
-  if (line == "{") {  // A multi-trace syntax
-    line = NextLineSkipComments();
-  } else {
-    *last_stack_trace = true;
-  }
-
-  while (true) {
-    if (!ParseStackTraceLine(trace, line))
-      return false;
-    line = NextLineSkipComments();
-    if (line == "}")
-      break;
-  }
-  return true;
-}
-
-bool Parser::NextSuppression(Suppression* supp) {
-  string line;
-  line = NextLineSkipComments();
-  if (line.empty())
-    return false;
-  // Opening {
-  PARSER_CHECK(line == "{", "expected '{'");
-  // Suppression name.
-  line = NextLineSkipComments();
-  PARSER_CHECK(!line.empty(), "expected suppression name");
-  supp->name = line;
-  // tool[,tool]:warning_name.
-  line = NextLineSkipComments();
-  PARSER_CHECK(!line.empty(), "expected tool[, tool]:warning_name line");
-  if (!ParseSuppressionToolsLine(supp, line))
-    return false;
-  if (0) {  // Not used currently. May still be needed later.
-    // A possible extra line.
-    line = NextLineSkipComments();
-    if (IsExtraLine(line))
-      supp->extra = line;
-    else
-      PutBackSkipComments(line);
-  }
-  // Everything else.
-  bool done = false;
-  while (!done) {
-    StackTraceTemplate trace;
-    if (NextStackTraceTemplate(&trace, &done))
-      supp->templates.push_back(trace);
-    if (error_)
-      return false;
-  }
-  // TODO(eugenis): Do we need to check for empty traces?
-  return true;
-}
-
-struct Suppressions::SuppressionsRep {
-  vector<Suppression> suppressions;
-  string error_string_;
-  int error_line_no_;
-};
-
-Suppressions::Suppressions() : rep_(new SuppressionsRep) {}
-
-Suppressions::~Suppressions() {
-  delete rep_;
-}
-
-int Suppressions::ReadFromString(const string &str) {
-  int sizeBefore = rep_->suppressions.size();
-  Parser parser(str);
-  Suppression supp;
-  while (parser.NextSuppression(&supp)) {
-    rep_->suppressions.push_back(supp);
-  }
-  if (parser.GetError()) {
-    rep_->error_string_ = parser.GetErrorString();
-    rep_->error_line_no_ = parser.GetLineNo();
-    return -1;
-  }
-  return rep_->suppressions.size() - sizeBefore;
-}
-
-string Suppressions::GetErrorString() {
-  return rep_->error_string_;
-}
-
-int Suppressions::GetErrorLineNo() {
-  return rep_->error_line_no_;
-}
-
-struct MatcherContext {
-  MatcherContext(
-      const vector<string>& function_names_mangled_,
-      const vector<string>& function_names_demangled_,
-      const vector<string>& object_names_) :
-      function_names_mangled(function_names_mangled_),
-      function_names_demangled(function_names_demangled_),
-      object_names(object_names_),
-      tmpl(NULL)
-  {}
-
-  const vector<string>& function_names_mangled;
-  const vector<string>& function_names_demangled;
-  const vector<string>& object_names;
-  StackTraceTemplate* tmpl;
-};
-
-static bool MatchStackTraceRecursive(MatcherContext ctx, int trace_index,
-    int tmpl_index) {
-  const int trace_size = ctx.function_names_mangled.size();
-  const int tmpl_size = ctx.tmpl->locations.size();
-  while (trace_index < trace_size && tmpl_index < tmpl_size) {
-    Location& location = ctx.tmpl->locations[tmpl_index];
-    if (location.type == LT_STAR) {
-      ++tmpl_index;
-      while (trace_index < trace_size) {
-        if (MatchStackTraceRecursive(ctx, trace_index++, tmpl_index))
-          return true;
-      }
-      return false;
-    } else {
-      bool match = false;
-      if (location.type == LT_OBJ) {
-        match = StringMatch(location.name, ctx.object_names[trace_index]);
-      } else {
-        CHECK(location.type == LT_FUN);
-        match =
-          StringMatch(location.name, ctx.function_names_mangled[trace_index]) ||
-          StringMatch(location.name, ctx.function_names_demangled[trace_index]);
-      }
-      if (match) {
-        ++trace_index;
-        ++tmpl_index;
-      } else {
-        return false;
-      }
-    }
-  }
-  return tmpl_index == tmpl_size;
-}
-
-bool Suppressions::StackTraceSuppressed(string tool_name, string warning_name,
-    const vector<string>& function_names_mangled,
-    const vector<string>& function_names_demangled,
-    const vector<string>& object_names,
-    string *name_of_suppression) {
-  MatcherContext ctx(function_names_mangled, function_names_demangled,
-      object_names);
-  for (vector<Suppression>::iterator it = rep_->suppressions.begin();
-       it != rep_->suppressions.end(); ++it) {
-    if (it->warning_name != warning_name ||
-        it->tools.find(tool_name) == it->tools.end())
-      continue;
-    for (vector<StackTraceTemplate>::iterator it2 = it->templates.begin();
-         it2 != it->templates.end(); ++it2) {
-      ctx.tmpl = &*it2;
-      bool result = MatchStackTraceRecursive(ctx, 0, 0);
-      if (result) {
-        *name_of_suppression = it->name;
-        return true;
-      }
-    }
-  }
-  return false;
-}
diff --git a/tsan/suppressions.h b/tsan/suppressions.h
deleted file mode 100644
index 52adeee..0000000
--- a/tsan/suppressions.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Evgeniy Stepanov.
-
-// This file contains the parser and matcher for valgrind-compatible
-// suppressions. It supports extended suppression syntax, see details at
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerSuppressions
-
-#ifndef TSAN_SUPPRESSIONS_H_
-#define TSAN_SUPPRESSIONS_H_
-
-#include "common_util.h"
-
-class Suppressions {
- public:
-  Suppressions();
-  ~Suppressions();
-
-  // Read suppressions file from string. May be called several times.
-  // Return the number of parsed suppressions or -1 if an error occured.
-  int ReadFromString(const string &str);
-
-  // Returns the string describing the last error. Undefined if there was no
-  // error.
-  string GetErrorString();
-
-  // Returns the line number of the last error. Undefined if there was no error.
-  int GetErrorLineNo();
-
-  // Checks if a given stack trace is suppressed.
-  bool StackTraceSuppressed(string tool_name, string warning_name,
-      const vector<string>& function_names_mangled,
-      const vector<string>& function_names_demangled,
-      const vector<string>& object_names,
-      string *name_of_suppression);
-
- private:
-  struct SuppressionsRep;
-  SuppressionsRep* rep_;
-};
-
-#endif  // TSAN_SUPPRESSIONS_H_
diff --git a/tsan/suppressions_test.cc b/tsan/suppressions_test.cc
deleted file mode 100644
index 717bf5d..0000000
--- a/tsan/suppressions_test.cc
+++ /dev/null
@@ -1,610 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Evgeniy Stepanov.
-
-// This file contains tests for suppressions implementation.
-
-#include <gtest/gtest.h>
-
-#include "suppressions.h"
-
-#define VEC(arr) *(new vector<string>(arr, arr + sizeof(arr) / sizeof(*arr)))
-
-class BaseSuppressionsTest : public ::testing::Test {
- protected:
-  bool IsSuppressed(string tool, string warning_type, const vector<string>& f_m,
-      const vector<string>& f_d, const vector<string>& o) {
-    string result;
-    return supp_.StackTraceSuppressed(
-        tool, warning_type, f_m, f_d, o, &result);
-  }
-
-  bool IsSuppressed(const vector<string>& f_m, const vector<string>& f_d,
-      const vector<string>& o) {
-    return IsSuppressed("test_tool", "test_warning_type", f_m, f_d, o);
-  }
-
-  Suppressions supp_;
-};
-
-class SuppressionsTest : public BaseSuppressionsTest {
- protected:
-  virtual void SetUp() {
-    const string data =
-        "{\n"
-        "  name\n"
-        "  test_tool,tool2:test_warning_type\n"
-        "  fun:function1\n"
-        "  obj:object1\n"
-        "  fun:function2\n"
-        "}";
-    supp_.ReadFromString(data);
-  }
-};
-
-
-TEST_F(SuppressionsTest, Simple) {
-  string m[] = {"aa", "bb", "cc"};
-  string d[] = {"aaa", "bbb", "ccc"};
-  string o[] = {"object1", "object2", "object3"};
-  ASSERT_FALSE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(SuppressionsTest, Simple2) {
-  string m[] = {"function1", "bb", "function2"};
-  string d[] = {"aaa", "bbb", "ccc"};
-  string o[] = {"object2", "object1", "object3"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-// A long stack trace is ok.
-TEST_F(SuppressionsTest, LongTrace) {
-  string m[] = {"function1", "bb", "function2", "zz"};
-  string d[] = {"aaa", "bbb", "ccc", "zzz"};
-  string o[] = {"object2", "object1", "object3", "o4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-// A stack trace template only matches at the top of the stack.
-TEST_F(SuppressionsTest, OnlyMatchesAtTheTop) {
-  string m[] = {"zz", "function1", "bb", "function2"};
-  string d[] = {"zzz", "aaa", "bbb", "ccc"};
-  string o[] = {"o0", "object2", "object1", "object3"};
-  ASSERT_FALSE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-// A short stack trace is not.
-TEST_F(SuppressionsTest, ShortTrace) {
-  string m[] = {"function1", "bb"};
-  string d[] = {"aaa", "bbb"};
-  string o[] = {"object2", "object1"};
-  ASSERT_FALSE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-class SuppressionsWithWildcardsTest : public BaseSuppressionsTest {
- protected:
-  virtual void SetUp() {
-    const string data =
-        "{\n"
-        "  name\n"
-        "  test_tool,tool2:test_warning_type\n"
-        "  fun:fun*1\n"
-        "  obj:obj*t1\n"
-        "  ...\n"
-        "  fun:f?n*2\n"
-        "}";
-    supp_.ReadFromString(data);
-  }
-};
-
-TEST_F(SuppressionsWithWildcardsTest, Wildcards1) {
-  string m[] = {"function1", "bb", "function2"};
-  string d[] = {"aaa", "bbb", "ccc"};
-  string o[] = {"object2", "object1", "object3"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(SuppressionsWithWildcardsTest, Wildcards2) {
-  string m[] = {"some_other_function1", "bb", "function2"};
-  string d[] = {"aaa", "bbb", "ccc"};
-  string o[] = {"object2", "object1", "object3"};
-  ASSERT_FALSE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(SuppressionsWithWildcardsTest, Wildcards3) {
-  string m[] = {"fun1", "bb", "fanction2"};
-  string d[] = {"aaa", "bbb", "ccc"};
-  string o[] = {"object2", "objt1", "object3"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-// Tests "..." wildcard.
-TEST_F(SuppressionsWithWildcardsTest, VerticalWildcards1) {
-  string m[] = {"fun1", "bb", "qq", "fanction2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-
-class MultipleStackTraceTest : public BaseSuppressionsTest {
- protected:
-  virtual void SetUp() {
-    const string data =
-        "{\n"
-        "  name\n"
-        "  test_tool,tool2:test_warning_type\n"
-        "  {\n"
-        "    fun:fun*1\n"
-        "  }\n"
-        "  {\n"
-        "    fun:fun*2\n"
-        "    fun:fun*3\n"
-        "  }\n"
-        "  {\n"
-        "    ...\n"
-        "    fun:fun*4\n"
-        "    obj:obj*5\n"
-        "  }\n"
-        "}";
-    supp_.ReadFromString(data);
-  }
-};
-
-TEST_F(MultipleStackTraceTest, Simple1) {
-  string m[] = {"fun1", "bb", "qq", "fun2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object1", "object2", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(MultipleStackTraceTest, SecondTemplateMatches) {
-  string m[] = {"fun2", "fun3", "qq", "fun2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object1", "object2", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(MultipleStackTraceTest, ThirdTemplateMatches) {
-  string m[] = {"fun4", "bb", "qq", "fun2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object1", "object5", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(MultipleStackTraceTest, NothingMatches) {
-  string m[] = {"_fun1", "bb", "qq", "fun2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object1", "object2", "object3", "object4"};
-  ASSERT_FALSE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(MultipleStackTraceTest, TwoTemplatesMatch) {
-  string m[] = {"fun1", "bb", "fun4", "fun2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object1", "object2", "object3", "object5"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-
-TEST_F(BaseSuppressionsTest, StartsWithVerticalWildcard) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  ...\n"
-      "  fun:qq\n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"fun1", "bb", "qq", "function2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(BaseSuppressionsTest, StartsWithVerticalWildcard2) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  ...\n"
-      "  fun:fun1\n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"fun1", "bb", "qq", "function2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(BaseSuppressionsTest, EndsWithVerticalWildcard) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:fun1\n"
-      "  ...\n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"fun1", "bb", "qq", "function2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(BaseSuppressionsTest, EndsWithVerticalWildcard2) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:qq\n"
-      "  ...\n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"fun1", "bb", "qq", "function2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_FALSE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(BaseSuppressionsTest, Complex) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:qq\n"
-      "  ...\n"
-      "  obj:obj*3\n"
-      "  ...\n"
-      "  fun:function?\n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"fun1", "bb", "qq", "function2"};
-  string d[] = {"aaa", "bbb", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_FALSE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(BaseSuppressionsTest, DemangledNames) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:bb*w?\n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"fun1", "bb", "qq", "function2"};
-  string d[] = {"bbbxxwz", "aaa", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(BaseSuppressionsTest, TrailingWhitespace) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:bb*w? \n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"fun1", "bb", "qq", "function2"};
-  string d[] = {"bbbxxwz", "aaa", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(BaseSuppressionsTest, ObjectiveC) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:-[NSObject(NSKeyValueCoding) setValue:forKeyPath:]\n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"-[NSObject(NSKeyValueCoding) setValue:forKeyPath:]", "function2"};
-  string d[] = {"bbbxxwz", "aaa", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-TEST_F(BaseSuppressionsTest, ComparisonAndShiftOperators) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:operator<\n"
-      "  fun:operator>\n"
-      "  fun:operator<=\n"
-      "  fun:operator>=\n"
-      "  fun:operator<<\n"
-      "  fun:operator>>\n"
-      "  fun:operator<<=\n"
-      "  fun:operator>>=\n"
-      "  fun:operator->\n"
-      "  fun:operator->*\n"
-      "}";
-  ASSERT_GT(supp_.ReadFromString(data), 0);
-  string m[] = {"operator<", "operator>", "operator<=", "operator>=",
-                "operator<<", "operator>>", "operator<<=", "operator>>=",
-                "operator->", "operator->*"};
-  string d[] = {"bbbxxwz", "aaa", "ddd", "ccc"};
-  string o[] = {"object2", "objt1", "object3", "object4"};
-  ASSERT_TRUE(IsSuppressed(VEC(m), VEC(d), VEC(o)));
-}
-
-
-class FailingSuppressionsTest : public ::testing::Test {
- protected:
-  int ErrorLineNo(string data) {
-    int result = supp_.ReadFromString(data);
-    if (result >= 0)
-      return -1;
-    else
-      return supp_.GetErrorLineNo();
-  }
-
-  Suppressions supp_;
-};
-
-TEST_F(FailingSuppressionsTest, NoOpeningBrace) {
-  const string data =
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:bb*w? \n"
-      "}";
-  ASSERT_EQ(1, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, Bad1) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  something_else\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:bb*w? \n"
-      "}";
-  ASSERT_EQ(3, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, Bad2) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  extra\n"
-      "  fun:bb*w? \n"
-      "}";
-  ASSERT_EQ(4, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, Bad3) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:bb*w? \n"
-      "  extra\n"
-      "}";
-  ASSERT_EQ(5, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, SomeWeirdTextAfterASuppression) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  fun:bb*w? \n"
-      "}\n"
-      "some_weird_text\n"
-      "after_a_suppression\n";
-  ASSERT_EQ(6, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, NoToolsLineInMultitraceSuppression) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  {\n"
-      "    fun:fun*2\n"
-      "    fun:fun*3\n"
-      "  }\n"
-      "  {\n"
-      "    ...\n"
-      "    fun:fun*4\n"
-      "    obj:obj*5\n"
-      "  }\n"
-      "}";
-  ASSERT_EQ(3, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, BadStacktrace1) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  {\n"
-      "    fun:fun*2\n"
-      "    fun:fun*3\n"
-      "  }\n"
-      "  {\n"
-      "    zzz\n"
-      "    fun:fun*4\n"
-      "    obj:obj*5\n"
-      "  }\n"
-      "}";
-  ASSERT_EQ(9, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, BadStacktrace2) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  {\n"
-      "    fun:fun*2\n"
-      "    fun:fun*3\n"
-      "  }\n"
-      "  {\n"
-      "    {\n"
-      "    fun:fun*4\n"
-      "    obj:obj*5\n"
-      "  }\n"
-      "}";
-  ASSERT_EQ(9, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, BadStacktrace3) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  {\n"
-      "    fun:fun*2\n"
-      "    fun:fun*3\n"
-      "  }\n"
-      "  {\n"
-      "    fun:fun*4\n"
-      "    obj:obj*5\n"
-      "  }\n"
-      "  zzz\n"
-      "}";
-  ASSERT_EQ(12, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, StacktraceWithParenthesis) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  {\n"
-      "    fun:fun*2\n"
-      "    fun:fun*3\n"
-      "  }\n"
-      "  {\n"
-      "    fun:fun*4()\n"
-      "    obj:obj*5\n"
-      "  }\n"
-      "}";
-  ASSERT_EQ(9, ErrorLineNo(data));
-}
-
-TEST_F(FailingSuppressionsTest, StacktraceWithAngleBraces) {
-  const string data =
-      "{\n"
-      "  name\n"
-      "  test_tool:test_warning_type\n"
-      "  {\n"
-      "    fun:fun*2\n"
-      "    fun:fun*3\n"
-      "  }\n"
-      "  {\n"
-      "    fun:fun<int>*4\n"
-      "    obj:obj*5\n"
-      "  }\n"
-      "}";
-  ASSERT_EQ(9, ErrorLineNo(data));
-}
-
-
-TEST(WildcardTest, Simple) {
-  EXPECT_TRUE(StringMatch("abc", "abc"));
-  EXPECT_FALSE(StringMatch("abcd", "abc"));
-  EXPECT_FALSE(StringMatch("dabc", "abc"));
-  EXPECT_FALSE(StringMatch("ab", "abc"));
-  EXPECT_FALSE(StringMatch("", "abc"));
-  EXPECT_FALSE(StringMatch("abc", ""));
-  EXPECT_TRUE(StringMatch("", ""));
-}
-
-TEST(WildcardTest, SingleCharacterWildcard) {
-  EXPECT_TRUE(StringMatch("a?c", "abc"));
-  EXPECT_TRUE(StringMatch("?bc", "abc"));
-  EXPECT_TRUE(StringMatch("ab?", "abc"));
-  EXPECT_TRUE(StringMatch("a??", "abc"));
-  EXPECT_TRUE(StringMatch("???", "abc"));
-  EXPECT_TRUE(StringMatch("?", "a"));
-  EXPECT_FALSE(StringMatch("?zc", "abc"));
-  EXPECT_FALSE(StringMatch("?bz", "abc"));
-  EXPECT_FALSE(StringMatch("b?c", "abc"));
-  EXPECT_FALSE(StringMatch("az?", "abc"));
-  EXPECT_FALSE(StringMatch("abc?", "abc"));
-  EXPECT_FALSE(StringMatch("?abc", "abc"));
-  EXPECT_FALSE(StringMatch("?", ""));
-  EXPECT_FALSE(StringMatch("??", ""));
-}
-
-TEST(WildcardTest, MultiCharacterWildcard) {
-  EXPECT_TRUE(StringMatch("*x", "x"));
-  EXPECT_TRUE(StringMatch("x*", "x"));
-  EXPECT_TRUE(StringMatch("*x*", "x"));
-
-  EXPECT_TRUE(StringMatch("a*d", "abcd"));
-  EXPECT_TRUE(StringMatch("ab*d", "abcd"));
-  EXPECT_TRUE(StringMatch("*cd", "abcd"));
-  EXPECT_TRUE(StringMatch("*d", "abcd"));
-  EXPECT_TRUE(StringMatch("ab*", "abcd"));
-  EXPECT_TRUE(StringMatch("a*", "abcd"));
-  EXPECT_TRUE(StringMatch("*", "abcd"));
-  EXPECT_TRUE(StringMatch("ab*cd", "abcd"));
-
-  EXPECT_TRUE(StringMatch("ab**", "abcd"));
-  EXPECT_TRUE(StringMatch("**", "abcd"));
-  EXPECT_TRUE(StringMatch("***", "abcd"));
-  EXPECT_TRUE(StringMatch("**d", "abcd"));
-  EXPECT_TRUE(StringMatch("*c*", "abcd"));
-  EXPECT_TRUE(StringMatch("a*c*d*f", "abcdef"));
-  EXPECT_TRUE(StringMatch("a*c*e*", "abcdef"));
-  EXPECT_TRUE(StringMatch("*a*b*f", "abcdef"));
-  EXPECT_TRUE(StringMatch("*b*d*", "abcdef"));
-
-  EXPECT_FALSE(StringMatch("b*", "abcd"));
-  EXPECT_FALSE(StringMatch("*c", "abcd"));
-  EXPECT_FALSE(StringMatch("*a", "abcd"));
-}
-
-TEST(WildcardTest, WildcardCharactersInText) {
-  EXPECT_TRUE(StringMatch("?", "?"));
-  EXPECT_FALSE(StringMatch("a", "?"));
-  EXPECT_FALSE(StringMatch("ab", "a?"));
-  EXPECT_FALSE(StringMatch("ab", "?b"));
-  EXPECT_TRUE(StringMatch("a?", "a?"));
-  EXPECT_TRUE(StringMatch("?b", "?b"));
-
-  EXPECT_TRUE(StringMatch("*", "*"));
-  EXPECT_FALSE(StringMatch("a", "*"));
-  EXPECT_FALSE(StringMatch("ab", "a*"));
-  EXPECT_FALSE(StringMatch("ab", "*b"));
-  EXPECT_TRUE(StringMatch("a*", "a*"));
-  EXPECT_TRUE(StringMatch("*b", "*b"));
-}
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/tsan/thread_sanitizer.cc b/tsan/thread_sanitizer.cc
deleted file mode 100644
index e118710..0000000
--- a/tsan/thread_sanitizer.cc
+++ /dev/null
@@ -1,9013 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-
-// You can find the details on this tool at
-// http://code.google.com/p/data-race-test
-
-#include "thread_sanitizer.h"
-#include "common_util.h"
-#include "suppressions.h"
-#include "ignore.h"
-#include "ts_lock.h"
-#include "ts_atomic_int.h"
-#include "dense_multimap.h"
-#include <stdarg.h>
-// -------- Constants --------------- {{{1
-// Segment ID (SID)      is in range [1, kMaxSID-1]
-// Segment Set ID (SSID) is in range [-kMaxSID+1, -1]
-// This is not a compile-time constant, but it can only be changed at startup.
-int kMaxSID = (1 << 23);
-// Flush state after so many SIDs have been allocated. Set by command line flag.
-int kMaxSIDBeforeFlush;
-
-// Lock ID (LID)      is in range [1, kMaxLID-1]
-// Lock Set ID (LSID) is in range [-kMaxLID+1, -1]
-const int kMaxLID = (1 << 23);
-
-// This is not a compile-time constant, but it can be changed only at startup.
-int kSizeOfHistoryStackTrace = 10;
-
-// Maximal number of segments in a SegmentSet.
-// If you change this constant, you also need to change several places
-// in SegmentSet code.
-const int kMaxSegmentSetSize = 4;
-
-// -------- Globals --------------- {{{1
-
-// If true, ignore all accesses in all threads.
-bool global_ignore;
-
-bool g_so_far_only_one_thread = false;
-bool g_has_entered_main = false;
-bool g_has_exited_main = false;
-
-size_t g_last_flush_time;
-
-// Incremented on each Lock and Unlock. Used by LockHistory.
-uint32_t g_lock_era = 0;
-
-uintptr_t g_nacl_mem_start = (uintptr_t)-1;
-uintptr_t g_nacl_mem_end = (uintptr_t)-1;
-
-bool g_race_verifier_active = false;
-
-bool debug_expected_races = false;
-bool debug_benign_races = false;
-bool debug_malloc = false;
-bool debug_free = false;
-bool debug_thread = false;
-bool debug_ignore = false;
-bool debug_rtn = false;
-bool debug_lock = false;
-bool debug_wrap = false;
-bool debug_ins = false;
-bool debug_shadow_stack = false;
-bool debug_happens_before = false;
-bool debug_cache = false;
-bool debug_race_verifier = false;
-bool debug_atomic = false;
-
-#define PrintfIf(flag, ...) \
-  do { if ((flag)) Printf(__VA_ARGS__); } while ((void)0, 0)
-
-// -------- TIL --------------- {{{1
-// ThreadSanitizer Internal lock (scoped).
-class TIL {
- public:
-  TIL(TSLock *lock, int lock_site, bool need_locking = true) :
-    lock_(lock),
-    need_locking_(need_locking) {
-    DCHECK(lock_);
-    if (need_locking_ && (TS_SERIALIZED == 0)) {
-      lock_->Lock();
-      G_stats->lock_sites[lock_site]++;
-    }
-  }
-  ~TIL() {
-    if (need_locking_ && (TS_SERIALIZED == 0))
-      lock_->Unlock();
-  }
- private:
-  TSLock *lock_;
-  bool need_locking_;
-};
-
-static TSLock *ts_lock;
-static TSLock *ts_ignore_below_lock;
-
-#ifdef TS_LLVM
-void ThreadSanitizerLockAcquire() {
-  ts_lock->Lock();
-}
-
-void ThreadSanitizerLockRelease() {
-  ts_lock->Unlock();
-}
-#endif
-
-static INLINE void AssertTILHeld() {
-  if (TS_SERIALIZED == 0 && DEBUG_MODE) {
-    ts_lock->AssertHeld();
-  }
-}
-
-// -------- Util ----------------------------- {{{1
-
-// Can't use ANNOTATE_UNPROTECTED_READ, it may get instrumented.
-template <class T>
-inline T INTERNAL_ANNOTATE_UNPROTECTED_READ(const volatile T &x) {
-  ANNOTATE_IGNORE_READS_BEGIN();
-  T res = x;
-  ANNOTATE_IGNORE_READS_END();
-  return res;
-}
-
-static string RemoveFilePrefix(string str) {
-  for (size_t i = 0; i < G_flags->file_prefix_to_cut.size(); i++) {
-    string prefix_to_cut = G_flags->file_prefix_to_cut[i];
-    size_t pos = str.find(prefix_to_cut);
-    if (pos != string::npos) {
-      str = str.substr(pos + prefix_to_cut.size());
-    }
-  }
-  if (str.find("./") == 0) {  // remove leading ./
-    str = str.substr(2);
-  }
-  return str;
-}
-
-string PcToRtnNameAndFilePos(uintptr_t pc) {
-  G_stats->pc_to_strings++;
-  string img_name;
-  string file_name;
-  string rtn_name;
-  int line_no = -1;
-  PcToStrings(pc, G_flags->demangle, &img_name, &rtn_name,
-              &file_name, &line_no);
-  if (G_flags->demangle && !G_flags->full_stack_frames)
-    rtn_name = NormalizeFunctionName(rtn_name);
-  file_name = RemoveFilePrefix(file_name);
-  if (file_name == "") {
-    return rtn_name + " " + RemoveFilePrefix(img_name);
-  }
-  char buff[10];
-  snprintf(buff, sizeof(buff), "%d", line_no);
-  return rtn_name + " " + file_name + ":" + buff;
-}
-
-// -------- ID ---------------------- {{{1
-// We wrap int32_t into ID class and then inherit various ID type from ID.
-// This is done in an attempt to implement type safety of IDs, i.e.
-// to make it impossible to make implicit cast from one ID type to another.
-class ID {
- public:
-  typedef int32_t T;
-  explicit ID(T id) : id_(id) {}
-  ID(const ID &id) : id_(id.id_) {}
-  INLINE bool operator ==  (const ID &id) const { return id_ == id.id_; }
-  bool operator !=  (const ID &id) const { return id_ != id.id_; }
-  bool operator <  (const ID &id) const { return id_ < id.id_; }
-  bool operator >  (const ID &id) const { return id_ > id.id_; }
-  bool operator >=  (const ID &id) const { return id_ >= id.id_; }
-  bool operator <=  (const ID &id) const { return id_ <= id.id_; }
-
-  bool IsValid() const { return id_ >= 0; }
-
-  const ID &operator = (const ID &id) {
-    this->id_ = id.id_;
-    return *this;
-  }
-  T raw() const { return id_; }
-
- private:
-  T id_;
-};
-
-// Thread ID.
-// id >= 0
-class TID: public ID {
- public:
-  static const int32_t kInvalidTID;
-
-  explicit TID(T id) : ID(id) {}
-  TID() : ID(kInvalidTID) {}
-  bool valid() const { return raw() >= 0; }
-};
-
-const int32_t TID::kInvalidTID = -1;
-
-// Segment ID.
-// id > 0 && id < kMaxSID
-class SID: public ID {
- public:
-  explicit SID(T id) : ID(id) {}
-  SID() : ID(0) {}
-  bool valid() const { return raw() > 0 && raw() < kMaxSID; }
-};
-
-// Lock ID.
-// id > 0 && id < kMaxLID
-class LID: public ID {
- public:
-  explicit LID(T id) : ID(id) {}
-  LID() : ID(0) {}
-  bool valid() const { return raw() > 0 && raw() < kMaxLID; }
-};
-
-// LockSet ID.
-// Empty lockset: id == 0
-// Singleton:     id > 0 (id == Lock's id)
-// Tuple:         id < 0
-class LSID: public ID {
- public:
-  explicit LSID(T id) : ID(id) {}
-  LSID() : ID(INT_MAX) {}
-  bool valid() const {
-    return raw() < kMaxLID && raw() > -(kMaxLID);
-  }
-  bool IsEmpty() const { return raw() == 0; }
-  bool IsSingleton() const { return raw() > 0; }
-  LID GetSingleton() const { return LID(raw()); }
-};
-
-// SegmentSet ID.
-// Empty SegmentSet: id == 0
-// Singleton:        id > 0 (id == Segment's id)
-// Tuple:            id < 0
-class SSID: public ID {
- public:
-  explicit SSID(T id) : ID(id) {}
-  explicit SSID(SID sid) : ID(sid.raw()) {}
-  SSID(): ID(INT_MAX) {}
-  bool valid() const {
-    return raw() != 0 && raw() < kMaxSID && raw() > -kMaxSID;
-  }
-  bool IsValidOrEmpty() { return raw() < kMaxSID && raw() > -kMaxSID; }
-  bool IsEmpty() const { return raw() == 0; }
-  bool IsSingleton() const {return raw() > 0; }
-  bool IsTuple() const {return raw() < 0; }
-  SID  GetSingleton() const {
-    DCHECK(IsSingleton());
-    return SID(raw());
-  }
-  // TODO(timurrrr): need to start SegmentSetArray indices from 1
-  // to avoid "int ???() { return -raw() - 1; }"
-};
-
-// -------- Colors ----------------------------- {{{1
-// Colors for ansi terminals and for html.
-const char *c_bold    = "";
-const char *c_red     = "";
-const char *c_green   = "";
-const char *c_magenta = "";
-const char *c_cyan    = "";
-const char *c_blue    = "";
-const char *c_yellow  = "";
-const char *c_default = "";
-
-
-// -------- Forward decls ------ {{{1
-static void ForgetAllStateAndStartOver(TSanThread *thr, const char *reason);
-static void FlushStateIfOutOfSegments(TSanThread *thr);
-static int32_t raw_tid(TSanThread *t);
-// -------- Simple Cache ------ {{{1
-#include "ts_simple_cache.h"
-// -------- PairCache & IntPairToIntCache ------ {{{1
-template <typename A, typename B, typename Ret,
-         int kHtableSize, int kArraySize = 8>
-class PairCache {
- public:
-  PairCache() {
-    CHECK(kHtableSize >= 0);
-    CHECK(sizeof(Entry) == sizeof(A) + sizeof(B) + sizeof(Ret));
-    Flush();
-  }
-
-  void Flush() {
-    memset(this, 0, sizeof(*this));
-
-    // Change the first hashtable entry so it doesn't match (0,0) on Lookup.
-    if (kHtableSize != 0)
-      memset(&htable_[0], 1, sizeof(Entry));
-
-    // Any Lookup should fail now.
-    for (int i = 0; i < kHtableSize; i++) {
-      Ret tmp;
-      DCHECK(!Lookup(htable_[i].a, htable_[i].b, &tmp));
-    }
-    CHECK(array_pos_    == 0);
-    CHECK(array_filled_ == false);
-  }
-
-  void Insert(A a, B b, Ret v) {
-    // fill the hash table
-    if (kHtableSize != 0) {
-      uint32_t idx  = compute_idx(a, b);
-      htable_[idx].Fill(a, b, v);
-    }
-
-    // fill the array
-    Ret dummy;
-    if (kArraySize != 0 && !ArrayLookup(a, b, &dummy)) {
-      array_[array_pos_ % kArraySize].Fill(a, b, v);
-      array_pos_ = (array_pos_ + 1) % kArraySize;
-      if (array_pos_ > kArraySize)
-        array_filled_ = true;
-    }
-  }
-
-  INLINE bool Lookup(A a, B b, Ret *v) {
-    // check the array
-    if (kArraySize != 0 && ArrayLookup(a, b, v)) {
-      G_stats->ls_cache_fast++;
-      return true;
-    }
-    // check the hash table.
-    if (kHtableSize != 0) {
-      uint32_t idx  = compute_idx(a, b);
-      Entry & prev_e = htable_[idx];
-      if (prev_e.Match(a, b)) {
-        *v = prev_e.v;
-        return true;
-      }
-    }
-    return false;
-  }
-
- private:
-  struct Entry {
-    A a;
-    B b;
-    Ret v;
-    void Fill(A a, B b, Ret v) {
-      this->a = a;
-      this->b = b;
-      this->v = v;
-    }
-    bool Match(A a, B b) const {
-      return this->a == a && this->b == b;
-    }
-  };
-
-  INLINE bool ArrayLookup(A a, B b, Ret *v) {
-    for (int i = 0; i < (array_filled_ ? kArraySize : array_pos_); i++) {
-      Entry & entry = array_[i];
-      if (entry.Match(a, b)) {
-        *v = entry.v;
-        return true;
-      }
-    }
-    return false;
-  }
-
-  uint32_t compute_idx(A a, B b) {
-    if (kHtableSize == 0)
-      return 0;
-    else
-      return combine2(a, b) % kHtableSize;
-  }
-
-  static uint32_t combine2(int a, int b) {
-    return (a << 16) ^ b;
-  }
-
-  static uint32_t combine2(SSID a, SID b) {
-    return combine2(a.raw(), b.raw());
-  }
-
-  Entry htable_[kHtableSize];
-
-  Entry array_[kArraySize];
-
-  // array_pos_    - next element to write to the array_ (mod kArraySize)
-  // array_filled_ - set to true once we write the last element of the array
-  int array_pos_;
-  bool array_filled_;
-};
-
-template<int kHtableSize, int kArraySize = 8>
-class IntPairToIntCache
-  : public PairCache<int, int, int, kHtableSize, kArraySize> {};
-
-
-
-// -------- FreeList --------------- {{{1
-class FreeList {
- public:
-  FreeList(int obj_size, int chunk_size)
-    : list_(0),
-      obj_size_(obj_size),
-      chunk_size_(chunk_size) {
-    CHECK_GE(obj_size_, static_cast<int>(sizeof(NULL)));
-    CHECK((obj_size_ % sizeof(NULL)) == 0);
-    CHECK_GE(chunk_size_, 1);
-  }
-
-  void *Allocate() {
-    if (!list_)
-      AllocateNewChunk();
-    CHECK(list_);
-    List *head = list_;
-    list_ = list_->next;
-    return reinterpret_cast<void*>(head);
-  }
-
-  void Deallocate(void *ptr) {
-    if (DEBUG_MODE) {
-      memset(ptr, 0xac, obj_size_);
-    }
-    List *new_head = reinterpret_cast<List*>(ptr);
-    new_head->next = list_;
-    list_ = new_head;
-  }
-
- private:
-  void AllocateNewChunk() {
-    CHECK(list_ == NULL);
-    uint8_t *new_mem = new uint8_t[obj_size_ * chunk_size_];
-    if (DEBUG_MODE) {
-      memset(new_mem, 0xab, obj_size_ * chunk_size_);
-    }
-    for (int i = 0; i < chunk_size_; i++) {
-      List *new_head = reinterpret_cast<List*>(new_mem + obj_size_ * i);
-      new_head->next = list_;
-      list_ = new_head;
-    }
-  }
-  struct List {
-    struct List *next;
-  };
-  List *list_;
-
-
-  const int obj_size_;
-  const int chunk_size_;
-};
-// -------- StackTrace -------------- {{{1
-class StackTraceFreeList {
- public:
-  uintptr_t *GetNewMemForStackTrace(size_t capacity) {
-    DCHECK(capacity <= (size_t)G_flags->num_callers);
-    return reinterpret_cast<uintptr_t*>(free_lists_[capacity]->Allocate());
-  }
-
-  void TakeStackTraceBack(uintptr_t *mem, size_t capacity) {
-    DCHECK(capacity <= (size_t)G_flags->num_callers);
-    free_lists_[capacity]->Deallocate(mem);
-  }
-
-  StackTraceFreeList() {
-    size_t n = G_flags->num_callers + 1;
-    free_lists_ = new FreeList *[n];
-    free_lists_[0] = NULL;
-    for (size_t i = 1; i < n; i++) {
-      free_lists_[i] = new FreeList((i+2) * sizeof(uintptr_t), 1024);
-    }
-  }
-
- private:
-  FreeList **free_lists_;  // Array of G_flags->num_callers lists.
-};
-
-static StackTraceFreeList *g_stack_trace_free_list;
-
-class StackTrace {
- public:
-  static StackTrace *CreateNewEmptyStackTrace(size_t size,
-                                              size_t capacity = 0) {
-    ScopedMallocCostCenter cc("StackTrace::CreateNewEmptyStackTrace()");
-    DCHECK(g_stack_trace_free_list);
-    DCHECK(size != 0);
-    if (capacity == 0)
-      capacity = size;
-    uintptr_t *mem = g_stack_trace_free_list->GetNewMemForStackTrace(capacity);
-    DCHECK(mem);
-    StackTrace *res = new(mem) StackTrace(size, capacity);
-    return res;
-  }
-
-  static void Delete(StackTrace *trace) {
-    if (!trace) return;
-    DCHECK(g_stack_trace_free_list);
-    g_stack_trace_free_list->TakeStackTraceBack(
-        reinterpret_cast<uintptr_t*>(trace), trace->capacity());
-  }
-
-  size_t size() const { return size_; }
-  size_t capacity() const { return capacity_; }
-
-  void set_size(size_t size) {
-    CHECK(size <= capacity());
-    size_ = size;
-  }
-
-
-  void Set(size_t i, uintptr_t pc) {
-    arr_[i] = pc;
-  }
-
-  uintptr_t Get(size_t i) const {
-    return arr_[i];
-  }
-
-  static bool CutStackBelowFunc(const string func_name) {
-    for (size_t i = 0; i < G_flags->cut_stack_below.size(); i++) {
-      if (StringMatch(G_flags->cut_stack_below[i], func_name)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static string EmbeddedStackTraceToString(const uintptr_t *emb_trace, size_t n,
-                                           const char *indent = "    ") {
-    string res = "";
-    const int kBuffSize = 10000;
-    char *buff = new char [kBuffSize];
-    for (size_t i = 0; i < n; i++) {
-      if (!emb_trace[i]) break;
-      string rtn_and_file = PcToRtnNameAndFilePos(emb_trace[i]);
-      if (rtn_and_file.find("(below main) ") == 0 ||
-          rtn_and_file.find("ThreadSanitizerStartThread ") == 0)
-        break;
-
-      if (i == 0) res += c_bold;
-      if (G_flags->show_pc) {
-        snprintf(buff, kBuffSize, "%s#%-2d %p: ",
-                 indent, static_cast<int>(i),
-                 reinterpret_cast<void*>(emb_trace[i]));
-      } else {
-        snprintf(buff, kBuffSize, "%s#%-2d ", indent, static_cast<int>(i));
-      }
-      res += buff;
-
-      res += rtn_and_file;
-      if (i == 0) res += c_default;
-      res += "\n";
-
-      // don't print after main ...
-      if (rtn_and_file.find("main ") == 0)
-        break;
-      // ... and after some default functions (see ThreadSanitizerParseFlags())
-      // and some more functions specified via command line flag.
-      string rtn = NormalizeFunctionName(PcToRtnName(emb_trace[i], true));
-      if (CutStackBelowFunc(rtn))
-        break;
-    }
-    delete [] buff;
-    return res;
-  }
-
-  string ToString(const char *indent = "    ") const {
-    if (!this) return "NO STACK TRACE\n";
-    if (size() == 0) return "EMPTY STACK TRACE\n";
-    return EmbeddedStackTraceToString(arr_, size(), indent);
-  }
-
-  void PrintRaw() const {
-    for (size_t i = 0; i < size(); i++) {
-      Printf("%p ", arr_[i]);
-    }
-    Printf("\n");
-  }
-
-  static bool Equals(const StackTrace *t1, const StackTrace *t2) {
-    if (t1->size_ != t2->size_) return false;
-    for (size_t i = 0; i < t1->size_; i++) {
-      if (t1->arr_[i] != t2->arr_[i]) return false;
-    }
-    return true;
-  }
-
-  struct Less {
-    bool operator() (const StackTrace *t1, const StackTrace *t2) const {
-      size_t size = min(t1->size_, t2->size_);
-      for (size_t i = 0; i < size; i++) {
-        if (t1->arr_[i] != t2->arr_[i]) {
-          return (t1->arr_[i] < t2->arr_[i]);
-        }
-      }
-      return t1->size_ < t2->size_;
-    }
-  };
-
- private:
-  StackTrace(size_t size, size_t capacity)
-    : size_(size),
-      capacity_(capacity) {
-  }
-
-  ~StackTrace() {}
-
-  size_t size_;
-  size_t capacity_;
-  uintptr_t arr_[];
-};
-
-
-
-// -------- Lock -------------------- {{{1
-const char *kLockAllocCC = "kLockAllocCC";
-class Lock {
- public:
-
-  static Lock *Create(uintptr_t lock_addr) {
-    ScopedMallocCostCenter cc("LockLookup");
-//    Printf("Lock::Create: %p\n", lock_addr);
-    // Destroy(lock_addr);
-
-    // CHECK(Lookup(lock_addr) == NULL);
-    Lock *res = LookupOrCreate(lock_addr);
-    res->rd_held_ = 0;
-    res->wr_held_ = 0;
-    res->is_pure_happens_before_ = G_flags->pure_happens_before;
-    res->last_lock_site_ = NULL;
-    return res;
-  }
-
-  static void Destroy(uintptr_t lock_addr) {
-//    Printf("Lock::Destroy: %p\n", lock_addr);
-  //  map_.erase(lock_addr);
-  }
-
-  static NOINLINE Lock *LookupOrCreate(uintptr_t lock_addr) {
-    ScopedMallocCostCenter cc("LockLookup");
-    Lock **lock = &(*map_)[lock_addr];
-    if (*lock == NULL) {
-//      Printf("Lock::LookupOrCreate: %p\n", lock_addr);
-      ScopedMallocCostCenter cc_lock("new Lock");
-      *lock = new Lock(lock_addr, map_->size());
-    }
-    return *lock;
-  }
-
-  static NOINLINE Lock *Lookup(uintptr_t lock_addr) {
-    ScopedMallocCostCenter cc("LockLookup");
-    Map::iterator it = map_->find(lock_addr);
-    if (it == map_->end()) return NULL;
-    return it->second;
-  }
-
-  int       rd_held()   const { return rd_held_; }
-  int       wr_held()   const { return wr_held_; }
-  uintptr_t lock_addr() const { return lock_addr_; }
-  LID       lid()       const { return lid_; }
-  bool is_pure_happens_before() const { return is_pure_happens_before_; }
-
-  // When a lock is pure happens-before, we need to create hb arcs
-  // between all Unlock/Lock pairs except RdUnlock/RdLock.
-  // For that purpose have two IDs on which we signal/wait.
-  // One id is the lock_addr itself, the second id is derived
-  // from lock_addr.
-  uintptr_t wr_signal_addr() const { return lock_addr(); }
-  uintptr_t rd_signal_addr() const { return lock_addr() + 1; }
-
-
-  void set_is_pure_happens_before(bool x) { is_pure_happens_before_ = x; }
-
-  void WrLock(TID tid, StackTrace *lock_site) {
-    CHECK(!rd_held_);
-    if (wr_held_ == 0) {
-      thread_holding_me_in_write_mode_ = tid;
-    } else {
-      CHECK(thread_holding_me_in_write_mode_ == tid);
-    }
-    wr_held_++;
-    StackTrace::Delete(last_lock_site_);
-    last_lock_site_ = lock_site;
-  }
-
-  void WrUnlock() {
-    CHECK(!rd_held_);
-    CHECK(wr_held_ > 0);
-    wr_held_--;
-  }
-
-  void RdLock(StackTrace *lock_site) {
-    CHECK(!wr_held_);
-    rd_held_++;
-    StackTrace::Delete(last_lock_site_);
-    last_lock_site_ = lock_site;
-  }
-
-  void RdUnlock() {
-    CHECK(!wr_held_);
-    CHECK(rd_held_);
-    rd_held_--;
-  }
-
-  void set_name(const char *name) { name_ = name; }
-  const char *name() const { return name_; }
-
-  string ToString() const {
-    string res;
-    char buff[100];
-    snprintf(buff, sizeof(buff), "L%d", lid_.raw());
-    // do we need to print the address?
-    // reinterpret_cast<void*>(lock_addr()));
-    res = buff;
-    if (name()) {
-      res += string(" ") + name();
-    }
-    return res;
-  }
-
-  static Lock *LIDtoLock(LID lid) {
-    // slow, but needed only for reports.
-    for (Map::iterator it = map_->begin(); it != map_->end(); ++it) {
-      Lock *l = it->second;
-      if (l->lid_ == lid) {
-        return l;
-      }
-    }
-    return NULL;
-  }
-
-  static string ToString(LID lid) {
-    Lock *lock = LIDtoLock(lid);
-    CHECK(lock);
-    return lock->ToString();
-  }
-
-  static void ReportLockWithOrWithoutContext(LID lid, bool with_context) {
-    if (!with_context) {
-      Report("   L%d\n", lid.raw());
-      return;
-    }
-    Lock *lock = LIDtoLock(lid);
-    CHECK(lock);
-    if (lock->last_lock_site_) {
-      Report("   %s (%p)\n%s",
-             lock->ToString().c_str(),
-             lock->lock_addr_,
-             lock->last_lock_site_->ToString().c_str());
-    } else {
-      Report("   %s. This lock was probably destroyed"
-                 " w/o calling Unlock()\n", lock->ToString().c_str());
-    }
-  }
-
-  static void InitClassMembers() {
-    map_ = new Lock::Map;
-  }
-
- private:
-  Lock(uintptr_t lock_addr, int32_t lid)
-    : lock_addr_(lock_addr),
-      lid_(lid),
-      rd_held_(0),
-      wr_held_(0),
-      is_pure_happens_before_(G_flags->pure_happens_before),
-      last_lock_site_(0),
-      name_(NULL) {
-  }
-
-  // Data members
-  uintptr_t lock_addr_;
-  LID       lid_;
-  int       rd_held_;
-  int       wr_held_;
-  bool      is_pure_happens_before_;
-  StackTrace *last_lock_site_;
-  const char *name_;
-  TID       thread_holding_me_in_write_mode_;
-
-  // Static members
-  typedef map<uintptr_t, Lock*> Map;
-  static Map *map_;
-};
-
-
-Lock::Map *Lock::map_;
-
-// Returns a string like "L123,L234".
-static string SetOfLocksToString(const set<LID> &locks) {
-  string res;
-  for (set<LID>::const_iterator it = locks.begin();
-       it != locks.end(); ++it) {
-    LID lid = *it;
-    char buff[100];
-    snprintf(buff, sizeof(buff), "L%d", lid.raw());
-    if (it != locks.begin())
-      res += ", ";
-    res += buff;
-  }
-  return res;
-}
-
-// -------- FixedArray--------------- {{{1
-template <typename T, size_t SizeLimit = 1024>
-class FixedArray {
- public:
-  explicit INLINE FixedArray(size_t array_size)
-      : size_(array_size),
-        array_((array_size <= SizeLimit
-                ? alloc_space_
-                : new T[array_size])) { }
-
-  ~FixedArray() {
-    if (array_ != alloc_space_) {
-      delete[] array_;
-    }
-  }
-
-  T* begin() { return array_; }
-  T& operator[](int i)             { return array_[i]; }
-
- private:
-  const size_t size_;
-  T* array_;
-  T alloc_space_[SizeLimit];
-};
-
-// -------- LockSet ----------------- {{{1
-class LockSet {
- public:
-  NOINLINE static LSID Add(LSID lsid, Lock *lock) {
-    ScopedMallocCostCenter cc("LockSetAdd");
-    LID lid = lock->lid();
-    if (lsid.IsEmpty()) {
-      // adding to an empty lock set
-      G_stats->ls_add_to_empty++;
-      return LSID(lid.raw());
-    }
-    int cache_res;
-    if (ls_add_cache_->Lookup(lsid.raw(), lid.raw(), &cache_res)) {
-      G_stats->ls_add_cache_hit++;
-      return LSID(cache_res);
-    }
-    LSID res;
-    if (lsid.IsSingleton()) {
-      LSSet set(lsid.GetSingleton(), lid);
-      G_stats->ls_add_to_singleton++;
-      res = ComputeId(set);
-    } else {
-      LSSet set(Get(lsid), lid);
-      G_stats->ls_add_to_multi++;
-      res = ComputeId(set);
-    }
-    ls_add_cache_->Insert(lsid.raw(), lid.raw(), res.raw());
-    return res;
-  }
-
-  // If lock is present in lsid, set new_lsid to (lsid \ lock) and return true.
-  // Otherwise set new_lsid to lsid and return false.
-  NOINLINE static bool Remove(LSID lsid, Lock *lock, LSID *new_lsid) {
-    *new_lsid = lsid;
-    if (lsid.IsEmpty()) return false;
-    LID lid = lock->lid();
-
-    if (lsid.IsSingleton()) {
-      // removing the only lock -> LSID(0)
-      if (lsid.GetSingleton() != lid) return false;
-      G_stats->ls_remove_from_singleton++;
-      *new_lsid = LSID(0);
-      return true;
-    }
-
-    int cache_res;
-    if (ls_rem_cache_->Lookup(lsid.raw(), lid.raw(), &cache_res)) {
-      G_stats->ls_rem_cache_hit++;
-      *new_lsid = LSID(cache_res);
-      return true;
-    }
-
-    LSSet &prev_set = Get(lsid);
-    if (!prev_set.has(lid)) return false;
-    LSSet set(prev_set, LSSet::REMOVE, lid);
-    CHECK(set.size() == prev_set.size() - 1);
-    G_stats->ls_remove_from_multi++;
-    LSID res = ComputeId(set);
-    ls_rem_cache_->Insert(lsid.raw(), lid.raw(), res.raw());
-    *new_lsid = res;
-    return true;
-  }
-
-  NOINLINE static bool IntersectionIsEmpty(LSID lsid1, LSID lsid2) {
-    // at least one empty
-    if (lsid1.IsEmpty() || lsid2.IsEmpty())
-      return true;  // empty
-
-    // both singletons
-    if (lsid1.IsSingleton() && lsid2.IsSingleton()) {
-      return lsid1 != lsid2;
-    }
-
-    // first is singleton, second is not
-    if (lsid1.IsSingleton()) {
-      const LSSet &set2 = Get(lsid2);
-      return set2.has(LID(lsid1.raw())) == false;
-    }
-
-    // second is singleton, first is not
-    if (lsid2.IsSingleton()) {
-      const LSSet &set1 = Get(lsid1);
-      return set1.has(LID(lsid2.raw())) == false;
-    }
-
-    // LockSets are equal and not empty
-    if (lsid1 == lsid2)
-      return false;
-
-    // both are not singletons - slow path.
-    bool ret = true,
-         cache_hit = false;
-    DCHECK(lsid2.raw() < 0);
-    if (ls_intersection_cache_->Lookup(lsid1.raw(), -lsid2.raw(), &ret)) {
-      if (!DEBUG_MODE)
-        return ret;
-      cache_hit = true;
-    }
-    const LSSet &set1 = Get(lsid1);
-    const LSSet &set2 = Get(lsid2);
-
-    FixedArray<LID> intersection(min(set1.size(), set2.size()));
-    LID *end = set_intersection(set1.begin(), set1.end(),
-                            set2.begin(), set2.end(),
-                            intersection.begin());
-    DCHECK(!cache_hit || (ret == (end == intersection.begin())));
-    ret = (end == intersection.begin());
-    ls_intersection_cache_->Insert(lsid1.raw(), -lsid2.raw(), ret);
-    return ret;
-  }
-
-  static bool HasNonPhbLocks(LSID lsid) {
-    if (lsid.IsEmpty())
-      return false;
-    if (lsid.IsSingleton())
-      return !Lock::LIDtoLock(LID(lsid.raw()))->is_pure_happens_before();
-
-    LSSet &set = Get(lsid);
-    for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it)
-      if (!Lock::LIDtoLock(*it)->is_pure_happens_before())
-        return true;
-    return false;
-  }
-
-  static string ToString(LSID lsid) {
-    if (lsid.IsEmpty()) {
-      return "{}";
-    } else if (lsid.IsSingleton()) {
-      return "{" + Lock::ToString(lsid.GetSingleton()) + "}";
-    }
-    const LSSet &set = Get(lsid);
-    string res = "{";
-    for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
-      if (it != set.begin()) res += ", ";
-      res += Lock::ToString(*it);
-    }
-    res += "}";
-    return res;
-  }
-
-  static void ReportLockSetWithContexts(LSID lsid,
-                                        set<LID> *locks_reported,
-                                        const char *descr) {
-    if (lsid.IsEmpty()) return;
-    Report("%s%s%s\n", c_green, descr, c_default);
-    if (lsid.IsSingleton()) {
-      LID lid = lsid.GetSingleton();
-      Lock::ReportLockWithOrWithoutContext(lid,
-                                           locks_reported->count(lid) == 0);
-      locks_reported->insert(lid);
-    } else {
-      const LSSet &set = Get(lsid);
-      for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
-        LID lid = *it;
-        Lock::ReportLockWithOrWithoutContext(lid,
-                                     locks_reported->count(lid) == 0);
-        locks_reported->insert(lid);
-      }
-    }
-  }
-
-  static void AddLocksToSet(LSID lsid, set<LID> *locks) {
-    if (lsid.IsEmpty()) return;
-    if (lsid.IsSingleton()) {
-      locks->insert(lsid.GetSingleton());
-    } else {
-      const LSSet &set = Get(lsid);
-      for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
-        locks->insert(*it);
-      }
-    }
-  }
-
-
-  static void InitClassMembers() {
-    map_ = new LockSet::Map;
-    vec_ = new LockSet::Vec;
-    ls_add_cache_ = new LSCache;
-    ls_rem_cache_ = new LSCache;
-    ls_rem_cache_ = new LSCache;
-    ls_intersection_cache_ = new LSIntersectionCache;
-  }
-
- private:
-  // No instances are allowed.
-  LockSet() { }
-
-  typedef DenseMultimap<LID, 3> LSSet;
-
-  static LSSet &Get(LSID lsid) {
-    ScopedMallocCostCenter cc(__FUNCTION__);
-    int idx = -lsid.raw() - 1;
-    DCHECK(idx >= 0);
-    DCHECK(idx < static_cast<int>(vec_->size()));
-    return (*vec_)[idx];
-  }
-
-  static LSID ComputeId(const LSSet &set) {
-    CHECK(set.size() > 0);
-    if (set.size() == 1) {
-      // signleton lock set has lsid == lid.
-      return LSID(set.begin()->raw());
-    }
-    DCHECK(map_);
-    DCHECK(vec_);
-    // multiple locks.
-    ScopedMallocCostCenter cc("LockSet::ComputeId");
-    int32_t *id = &(*map_)[set];
-    if (*id == 0) {
-      vec_->push_back(set);
-      *id = map_->size();
-      if      (set.size() == 2) G_stats->ls_size_2++;
-      else if (set.size() == 3) G_stats->ls_size_3++;
-      else if (set.size() == 4) G_stats->ls_size_4++;
-      else if (set.size() == 5) G_stats->ls_size_5++;
-      else                      G_stats->ls_size_other++;
-      if (*id >= 4096 && ((*id & (*id - 1)) == 0)) {
-        Report("INFO: %d LockSet IDs have been allocated "
-               "(2: %ld 3: %ld 4: %ld 5: %ld o: %ld)\n",
-               *id,
-               G_stats->ls_size_2, G_stats->ls_size_3,
-               G_stats->ls_size_4, G_stats->ls_size_5,
-               G_stats->ls_size_other
-               );
-      }
-    }
-    return LSID(-*id);
-  }
-
-  typedef map<LSSet, int32_t> Map;
-  static Map *map_;
-
-  static const char *kLockSetVecAllocCC;
-  typedef vector<LSSet> Vec;
-  static Vec *vec_;
-
-//  static const int kPrimeSizeOfLsCache = 307;
-//  static const int kPrimeSizeOfLsCache = 499;
-  static const int kPrimeSizeOfLsCache = 1021;
-  typedef IntPairToIntCache<kPrimeSizeOfLsCache> LSCache;
-  static LSCache *ls_add_cache_;
-  static LSCache *ls_rem_cache_;
-  static LSCache *ls_int_cache_;
-  typedef IntPairToBoolCache<kPrimeSizeOfLsCache> LSIntersectionCache;
-  static LSIntersectionCache *ls_intersection_cache_;
-};
-
-LockSet::Map *LockSet::map_;
-LockSet::Vec *LockSet::vec_;
-const char *LockSet::kLockSetVecAllocCC = "kLockSetVecAllocCC";
-LockSet::LSCache *LockSet::ls_add_cache_;
-LockSet::LSCache *LockSet::ls_rem_cache_;
-LockSet::LSCache *LockSet::ls_int_cache_;
-LockSet::LSIntersectionCache *LockSet::ls_intersection_cache_;
-
-
-static string TwoLockSetsToString(LSID rd_lockset, LSID wr_lockset) {
-  string res;
-  if (rd_lockset == wr_lockset) {
-    res = "L";
-    res += LockSet::ToString(wr_lockset);
-  } else {
-    res = "WR-L";
-    res += LockSet::ToString(wr_lockset);
-    res += "/RD-L";
-    res += LockSet::ToString(rd_lockset);
-  }
-  return res;
-}
-
-
-
-
-// -------- VTS ------------------ {{{1
-class VTS {
- public:
-  static size_t MemoryRequiredForOneVts(size_t size) {
-    return sizeof(VTS) + size * sizeof(TS);
-  }
-
-  static size_t RoundUpSizeForEfficientUseOfFreeList(size_t size) {
-    if (size < 32) return size;
-    if (size < 64) return (size + 7) & ~7;
-    if (size < 128) return (size + 15) & ~15;
-    return (size + 31) & ~31;
-  }
-
-  static VTS *Create(size_t size) {
-    DCHECK(size > 0);
-    void *mem;
-    size_t rounded_size = RoundUpSizeForEfficientUseOfFreeList(size);
-    DCHECK(size <= rounded_size);
-    if (rounded_size <= kNumberOfFreeLists) {
-      // Small chunk, use FreeList.
-      ScopedMallocCostCenter cc("VTS::Create (from free list)");
-      mem = free_lists_[rounded_size]->Allocate();
-      G_stats->vts_create_small++;
-    } else {
-      // Large chunk, use new/delete instead of FreeList.
-      ScopedMallocCostCenter cc("VTS::Create (from new[])");
-      mem = new int8_t[MemoryRequiredForOneVts(size)];
-      G_stats->vts_create_big++;
-    }
-    VTS *res = new(mem) VTS(size);
-    G_stats->vts_total_create += size;
-    return res;
-  }
-
-  static void Unref(VTS *vts) {
-    if (!vts) return;
-    CHECK_GT(vts->ref_count_, 0);
-    if (AtomicDecrementRefcount(&vts->ref_count_) == 0) {
-      size_t size = vts->size_;  // can't use vts->size().
-      size_t rounded_size = RoundUpSizeForEfficientUseOfFreeList(size);
-      if (rounded_size <= kNumberOfFreeLists) {
-        free_lists_[rounded_size]->Deallocate(vts);
-        G_stats->vts_delete_small++;
-      } else {
-        G_stats->vts_delete_big++;
-        delete vts;
-      }
-      G_stats->vts_total_delete += rounded_size;
-    }
-  }
-
-  static VTS *CreateSingleton(TID tid, int32_t clk = 1) {
-    VTS *res = Create(1);
-    res->arr_[0].tid = tid.raw();
-    res->arr_[0].clk = clk;
-    return res;
-  }
-
-  VTS *Clone() {
-    G_stats->vts_clone++;
-    AtomicIncrementRefcount(&ref_count_);
-    return this;
-  }
-
-  static VTS *CopyAndTick(const VTS *vts, TID id_to_tick) {
-    CHECK(vts->ref_count_);
-    VTS *res = Create(vts->size());
-    bool found = false;
-    for (size_t i = 0; i < res->size(); i++) {
-      res->arr_[i] = vts->arr_[i];
-      if (res->arr_[i].tid == id_to_tick.raw()) {
-        res->arr_[i].clk++;
-        found = true;
-      }
-    }
-    CHECK(found);
-    return res;
-  }
-
-  static VTS *Join(const VTS *vts_a, const VTS *vts_b) {
-    CHECK(vts_a->ref_count_);
-    CHECK(vts_b->ref_count_);
-    FixedArray<TS> result_ts(vts_a->size() + vts_b->size());
-    TS *t = result_ts.begin();
-    const TS *a = &vts_a->arr_[0];
-    const TS *b = &vts_b->arr_[0];
-    const TS *a_max = a + vts_a->size();
-    const TS *b_max = b + vts_b->size();
-    while (a < a_max && b < b_max) {
-      if (a->tid < b->tid) {
-        *t = *a;
-        a++;
-        t++;
-      } else if (a->tid > b->tid) {
-        *t = *b;
-        b++;
-        t++;
-      } else {
-        if (a->clk >= b->clk) {
-          *t = *a;
-        } else {
-          *t = *b;
-        }
-        a++;
-        b++;
-        t++;
-      }
-    }
-    while (a < a_max) {
-      *t = *a;
-      a++;
-      t++;
-    }
-    while (b < b_max) {
-      *t = *b;
-      b++;
-      t++;
-    }
-
-    VTS *res = VTS::Create(t - result_ts.begin());
-    for (size_t i = 0; i < res->size(); i++) {
-      res->arr_[i] = result_ts[i];
-    }
-    return res;
-  }
-
-  int32_t clk(TID tid) const {
-    // TODO(dvyukov): this function is sub-optimal,
-    // we only need thread's own clock.
-    for (size_t i = 0; i < size_; i++) {
-      if (arr_[i].tid == tid.raw()) {
-        return arr_[i].clk;
-      }
-    }
-    return 0;
-  }
-
-  static INLINE void FlushHBCache() {
-    hb_cache_->Flush();
-  }
-
-  static INLINE bool HappensBeforeCached(const VTS *vts_a, const VTS *vts_b) {
-    bool res = false;
-    if (hb_cache_->Lookup(vts_a->uniq_id_, vts_b->uniq_id_, &res)) {
-      G_stats->n_vts_hb_cached++;
-      DCHECK(res == HappensBefore(vts_a, vts_b));
-      return res;
-    }
-    res = HappensBefore(vts_a, vts_b);
-    hb_cache_->Insert(vts_a->uniq_id_, vts_b->uniq_id_, res);
-    return res;
-  }
-
-  // return true if vts_a happens-before vts_b.
-  static NOINLINE bool HappensBefore(const VTS *vts_a, const VTS *vts_b) {
-    CHECK(vts_a->ref_count_);
-    CHECK(vts_b->ref_count_);
-    G_stats->n_vts_hb++;
-    const TS *a = &vts_a->arr_[0];
-    const TS *b = &vts_b->arr_[0];
-    const TS *a_max = a + vts_a->size();
-    const TS *b_max = b + vts_b->size();
-    bool a_less_than_b = false;
-    while (a < a_max && b < b_max) {
-      if (a->tid < b->tid) {
-        // a->tid is not present in b.
-        return false;
-      } else if (a->tid > b->tid) {
-        // b->tid is not present in a.
-        a_less_than_b = true;
-        b++;
-      } else {
-        // this tid is present in both VTSs. Compare clocks.
-        if (a->clk > b->clk) return false;
-        if (a->clk < b->clk) a_less_than_b = true;
-        a++;
-        b++;
-      }
-    }
-    if (a < a_max) {
-      // Some tids are present in a and not in b
-      return false;
-    }
-    if (b < b_max) {
-      return true;
-    }
-    return a_less_than_b;
-  }
-
-  size_t size() const {
-    DCHECK(ref_count_);
-    return size_;
-  }
-
-  string ToString() const {
-    DCHECK(ref_count_);
-    string res = "[";
-    for (size_t i = 0; i < size(); i++) {
-      char buff[100];
-      snprintf(buff, sizeof(buff), "%d:%d;", arr_[i].tid, arr_[i].clk);
-      if (i) res += " ";
-      res += buff;
-    }
-    return res + "]";
-  }
-
-  void print(const char *name) const {
-    string str = ToString();
-    Printf("%s: %s\n", name, str.c_str());
-  }
-
-  static void TestHappensBefore() {
-    // TODO(kcc): need more tests here...
-    const char *test_vts[] = {
-      "[0:1;]",
-      "[0:4; 2:1;]",
-      "[0:4; 2:2; 4:1;]",
-      "[0:4; 3:2; 4:1;]",
-      "[0:4; 3:2; 4:2;]",
-      "[0:4; 3:3; 4:1;]",
-      NULL
-    };
-
-    for (int i = 0; test_vts[i]; i++) {
-      const VTS *vts1 = Parse(test_vts[i]);
-      for (int j = 0; test_vts[j]; j++) {
-        const VTS *vts2 = Parse(test_vts[j]);
-        bool hb  = HappensBefore(vts1, vts2);
-        Printf("HB = %d\n   %s\n   %s\n", static_cast<int>(hb),
-               vts1->ToString().c_str(),
-               vts2->ToString().c_str());
-        delete vts2;
-      }
-      delete vts1;
-    }
-  }
-
-  static void Test() {
-    Printf("VTS::test();\n");
-    VTS *v1 = CreateSingleton(TID(0));
-    VTS *v2 = CreateSingleton(TID(1));
-    VTS *v3 = CreateSingleton(TID(2));
-    VTS *v4 = CreateSingleton(TID(3));
-
-    VTS *v12 = Join(v1, v2);
-    v12->print("v12");
-    VTS *v34 = Join(v3, v4);
-    v34->print("v34");
-
-    VTS *x1 = Parse("[0:4; 3:6; 4:2;]");
-    CHECK(x1);
-    x1->print("x1");
-    TestHappensBefore();
-  }
-
-  // Parse VTS string in the form "[0:4; 3:6; 4:2;]".
-  static VTS *Parse(const char *str) {
-#if 1  // TODO(kcc): need sscanf in valgrind
-    return NULL;
-#else
-    vector<TS> vec;
-    if (!str) return NULL;
-    if (str[0] != '[') return NULL;
-    str++;
-    int tid = 0, clk = 0;
-    int consumed = 0;
-    while (sscanf(str, "%d:%d;%n", &tid, &clk, &consumed) > 0) {
-      TS ts;
-      ts.tid = TID(tid);
-      ts.clk = clk;
-      vec.push_back(ts);
-      str += consumed;
-      // Printf("%d:%d\n", tid, clk);
-    }
-    if (*str != ']') return NULL;
-    VTS *res = Create(vec.size());
-    for (size_t i = 0; i < vec.size(); i++) {
-      res->arr_[i] = vec[i];
-    }
-    return res;
-#endif
-  }
-
-  static void InitClassMembers() {
-    hb_cache_ = new HBCache;
-    free_lists_ = new FreeList *[kNumberOfFreeLists+1];
-    free_lists_[0] = 0;
-    for (size_t  i = 1; i <= kNumberOfFreeLists; i++) {
-      free_lists_[i] = new FreeList(MemoryRequiredForOneVts(i),
-                                    (kNumberOfFreeLists * 4) / i);
-    }
-  }
-
-  int32_t uniq_id() const { return uniq_id_; }
-
- private:
-  explicit VTS(size_t size)
-    : ref_count_(1),
-      size_(size) {
-    uniq_id_counter_++;
-    // If we've got overflow, we are in trouble, need to have 64-bits...
-    CHECK_GT(uniq_id_counter_, 0);
-    uniq_id_ = uniq_id_counter_;
-  }
-  ~VTS() {}
-
-  struct TS {
-    int32_t tid;
-    int32_t clk;
-  };
-
-
-  // data members
-  int32_t ref_count_;
-  int32_t uniq_id_;
-  size_t size_;
-  TS     arr_[];  // array of size_ elements.
-
-
-  // static data members
-  static int32_t uniq_id_counter_;
-  static const int kCacheSize = 4999;  // Has to be prime.
-  typedef IntPairToBoolCache<kCacheSize> HBCache;
-  static HBCache *hb_cache_;
-
-  static const size_t kNumberOfFreeLists = 512;  // Must be power of two.
-//  static const size_t kNumberOfFreeLists = 64; // Must be power of two.
-  static FreeList **free_lists_;  // Array of kNumberOfFreeLists elements.
-};
-
-int32_t VTS::uniq_id_counter_;
-VTS::HBCache *VTS::hb_cache_;
-FreeList **VTS::free_lists_;
-
-
-// This class is somewhat similar to VTS,
-// but it's mutable, not reference counted and not sorted.
-class VectorClock {
- public:
-  VectorClock()
-      : size_(),
-        clock_()
-  {
-  }
-
-  void reset() {
-    free(clock_);
-    size_ = 0;
-    clock_ = NULL;
-  }
-
-  int32_t clock(TID tid) const {
-    for (size_t i = 0; i != size_; i += 1) {
-      if (clock_[i].tid == tid.raw()) {
-        return clock_[i].clk;
-      }
-    }
-    return 0;
-  }
-
-  void update(TID tid, int32_t clk) {
-    for (size_t i = 0; i != size_; i += 1) {
-      if (clock_[i].tid == tid.raw()) {
-        clock_[i].clk = clk;
-        return;
-      }
-    }
-    size_ += 1;
-    clock_ = (TS*)realloc(clock_, size_ * sizeof(TS));
-    clock_[size_ - 1].tid = tid.raw();
-    clock_[size_ - 1].clk = clk;
-  }
-
- private:
-  struct TS {
-    int32_t tid;
-    int32_t clk;
-  };
-
-  size_t    size_;
-  TS*       clock_;
-};
-
-
-// -------- Mask -------------------- {{{1
-// A bit mask (32-bits on 32-bit arch and 64-bits on 64-bit arch).
-class Mask {
- public:
-  static const uintptr_t kOne = 1;
-  static const uintptr_t kNBits = sizeof(uintptr_t) * 8;
-  static const uintptr_t kNBitsLog = kNBits == 32 ? 5 : 6;
-
-  Mask() : m_(0) {}
-  Mask(const Mask &m) : m_(m.m_) { }
-  explicit Mask(uintptr_t m) : m_(m) { }
-  INLINE bool Get(uintptr_t idx) const   { return m_ & (kOne << idx); }
-  INLINE void Set(uintptr_t idx)   { m_ |= kOne << idx; }
-  INLINE void Clear(uintptr_t idx) { m_ &= ~(kOne << idx); }
-  INLINE bool Empty() const {return m_ == 0; }
-
-  // Clear bits in range [a,b) and return old [a,b) range.
-  INLINE Mask ClearRangeAndReturnOld(uintptr_t a, uintptr_t b) {
-    DCHECK(a < b);
-    DCHECK(b <= kNBits);
-    uintptr_t res;
-    uintptr_t n_bits_in_mask = (b - a);
-    if (n_bits_in_mask == kNBits) {
-      res = m_;
-      m_ = 0;
-    } else {
-      uintptr_t t = (kOne << n_bits_in_mask);
-      uintptr_t mask = (t - 1) << a;
-      res = m_ & mask;
-      m_ &= ~mask;
-    }
-    return Mask(res);
-  }
-
-  INLINE void ClearRange(uintptr_t a, uintptr_t b) {
-    ClearRangeAndReturnOld(a, b);
-  }
-
-  INLINE void SetRange(uintptr_t a, uintptr_t b) {
-    DCHECK(a < b);
-    DCHECK(b <= kNBits);
-    uintptr_t n_bits_in_mask = (b - a);
-    if (n_bits_in_mask == kNBits) {
-      m_ = ~0;
-    } else {
-      uintptr_t t = (kOne << n_bits_in_mask);
-      uintptr_t mask = (t - 1) << a;
-      m_ |= mask;
-    }
-  }
-
-  INLINE uintptr_t GetRange(uintptr_t a, uintptr_t b) const {
-    // a bug was fixed here
-    DCHECK(a < b);
-    DCHECK(b <= kNBits);
-    uintptr_t n_bits_in_mask = (b - a);
-    if (n_bits_in_mask == kNBits) {
-      return m_;
-    } else {
-      uintptr_t t = (kOne << n_bits_in_mask);
-      uintptr_t mask = (t - 1) << a;
-      return m_ & mask;
-    }
-  }
-
-  // Get index of some set bit (asumes mask is non zero).
-  size_t GetSomeSetBit() {
-    DCHECK(m_);
-    size_t ret;
-#ifdef __GNUC__
-    ret =  __builtin_ctzl(m_);
-#elif defined(_MSC_VER)
-    unsigned long index;
-    DCHECK(sizeof(uintptr_t) == 4);
-    _BitScanReverse(&index, m_);
-    ret = index;
-#else
-# error "Unsupported"
-#endif
-    DCHECK(this->Get(ret));
-    return ret;
-  }
-
-  size_t PopCount() {
-#ifdef VGO_linux
-    return __builtin_popcountl(m_);
-#else
-    CHECK(0);
-    return 0;
-#endif
-  }
-
-  void Subtract(Mask m) { m_ &= ~m.m_; }
-  void Union(Mask m) { m_ |= m.m_; }
-
-  static Mask Intersection(Mask m1, Mask m2) { return Mask(m1.m_ & m2.m_); }
-
-
-  void Clear() { m_ = 0; }
-
-
-  string ToString() const {
-    char buff[kNBits+1];
-    for (uintptr_t i = 0; i < kNBits; i++) {
-      buff[i] = Get(i) ? '1' : '0';
-    }
-    buff[kNBits] = 0;
-    return buff;
-  }
-
-  static void Test() {
-    Mask m;
-    m.Set(2);
-    Printf("%s\n", m.ToString().c_str());
-    m.ClearRange(0, kNBits);
-    Printf("%s\n", m.ToString().c_str());
-  }
-
- private:
-  uintptr_t m_;
-};
-
-// -------- BitSet -------------------{{{1
-// Poor man's sparse bit set.
-class BitSet {
- public:
-  // Add range [a,b). The range should be within one line (kNBitsLog).
-  void Add(uintptr_t a, uintptr_t b) {
-    uintptr_t line = a & ~(Mask::kNBits - 1);
-    DCHECK(a < b);
-    DCHECK(a - line < Mask::kNBits);
-    if (!(b - line <= Mask::kNBits)) {
-      Printf("XXXXX %p %p %p b-line=%ld size=%ld a-line=%ld\n", a, b, line,
-             b - line, b - a, a - line);
-      return;
-    }
-    DCHECK(b - line <= Mask::kNBits);
-    DCHECK(line == ((b - 1) & ~(Mask::kNBits - 1)));
-    Mask &mask= map_[line];
-    mask.SetRange(a - line, b - line);
-  }
-
-  bool empty() { return map_.empty(); }
-
-  size_t size() {
-    size_t res = 0;
-    for (Map::iterator it = map_.begin(); it != map_.end(); ++it) {
-      res += it->second.PopCount();
-    }
-    return res;
-  }
-
-  string ToString() {
-    char buff[100];
-    string res;
-    int lines = 0;
-    snprintf(buff, sizeof(buff), " %ld lines %ld bits:",
-             (long)map_.size(), (long)size());
-    res += buff;
-    for (Map::iterator it = map_.begin(); it != map_.end(); ++it) {
-      Mask mask = it->second;
-      snprintf(buff, sizeof(buff), " l%d (%ld):", lines++, (long)mask.PopCount());
-      res += buff;
-      uintptr_t line = it->first;
-      bool is_in = false;
-      for (size_t i = 0; i < Mask::kNBits; i++) {
-        uintptr_t addr = line + i;
-        if (mask.Get(i)) {
-          if (!is_in) {
-            snprintf(buff, sizeof(buff), " [%lx,", (long)addr);
-            res += buff;
-            is_in = true;
-          }
-        } else {
-          if (is_in) {
-            snprintf(buff, sizeof(buff), "%lx);", (long)addr);
-            res += buff;
-            is_in = false;
-          }
-        }
-      }
-      if (is_in) {
-        snprintf(buff, sizeof(buff), "%lx);", (long)(line + Mask::kNBits));
-        res += buff;
-      }
-    }
-    return res;
-  }
-
-  void Clear() { map_.clear(); }
- private:
-  typedef map<uintptr_t, Mask> Map;
-  Map map_;
-};
-
-// -------- Segment -------------------{{{1
-class Segment {
- public:
-  // for debugging...
-  static bool ProfileSeg(SID sid) {
-    // return (sid.raw() % (1 << 14)) == 0;
-    return false;
-  }
-
-  // non-static methods
-
-  VTS *vts() const { return vts_; }
-  TID tid() const { return TID(tid_); }
-  LSID  lsid(bool is_w) const { return lsid_[is_w]; }
-  uint32_t lock_era() const { return lock_era_; }
-
-  // static methods
-
-  static INLINE uintptr_t *embedded_stack_trace(SID sid) {
-    DCHECK(sid.valid());
-    DCHECK(kSizeOfHistoryStackTrace > 0);
-    size_t chunk_idx = (unsigned)sid.raw() / kChunkSizeForStacks;
-    size_t idx       = (unsigned)sid.raw() % kChunkSizeForStacks;
-    DCHECK(chunk_idx < n_stack_chunks_);
-    DCHECK(all_stacks_[chunk_idx] != NULL);
-    return &all_stacks_[chunk_idx][idx * kSizeOfHistoryStackTrace];
-  }
-
-  static void ensure_space_for_stack_trace(SID sid) {
-    ScopedMallocCostCenter malloc_cc(__FUNCTION__);
-    DCHECK(sid.valid());
-    DCHECK(kSizeOfHistoryStackTrace > 0);
-    size_t chunk_idx = (unsigned)sid.raw() / kChunkSizeForStacks;
-    DCHECK(chunk_idx < n_stack_chunks_);
-    if (all_stacks_[chunk_idx])
-      return;
-    for (size_t i = 0; i <= chunk_idx; i++) {
-      if (all_stacks_[i]) continue;
-      all_stacks_[i] = new uintptr_t[
-          kChunkSizeForStacks * kSizeOfHistoryStackTrace];
-      // we don't clear this memory, it will be clreared later lazily.
-      // We also never delete it because it will be used until the very end.
-    }
-  }
-
-  static string StackTraceString(SID sid) {
-    DCHECK(kSizeOfHistoryStackTrace > 0);
-    return StackTrace::EmbeddedStackTraceToString(
-        embedded_stack_trace(sid), kSizeOfHistoryStackTrace);
-  }
-
-  // Allocate `n` fresh segments, put SIDs into `fresh_sids`.
-  static INLINE void AllocateFreshSegments(size_t n, SID *fresh_sids) {
-    ScopedMallocCostCenter malloc_cc(__FUNCTION__);
-    size_t i = 0;
-    size_t n_reusable = min(n, reusable_sids_->size());
-    // First, allocate from reusable_sids_.
-    for (; i < n_reusable; i++) {
-      G_stats->seg_reuse++;
-      DCHECK(!reusable_sids_->empty());
-      SID sid = reusable_sids_->back();
-      reusable_sids_->pop_back();
-      Segment *seg = GetInternal(sid);
-      DCHECK(!seg->seg_ref_count_);
-      DCHECK(!seg->vts());
-      DCHECK(!seg->tid().valid());
-      CHECK(sid.valid());
-      if (ProfileSeg(sid)) {
-       Printf("Segment: reused SID %d\n", sid.raw());
-      }
-      fresh_sids[i] = sid;
-    }
-    // allocate the rest from new sids.
-    for (; i < n; i++) {
-      G_stats->seg_create++;
-      CHECK(n_segments_ < kMaxSID);
-      Segment *seg = GetSegmentByIndex(n_segments_);
-
-      // This VTS may not be empty due to ForgetAllState().
-      VTS::Unref(seg->vts_);
-      seg->vts_ = 0;
-      seg->seg_ref_count_ = 0;
-
-      if (ProfileSeg(SID(n_segments_))) {
-       Printf("Segment: allocated SID %d\n", n_segments_);
-      }
-
-      SID sid = fresh_sids[i] = SID(n_segments_);
-      if (kSizeOfHistoryStackTrace > 0) {
-        ensure_space_for_stack_trace(sid);
-      }
-      n_segments_++;
-    }
-  }
-
-  // Initialize the contents of the given segment.
-  static INLINE void SetupFreshSid(SID sid, TID tid, VTS *vts,
-                                   LSID rd_lockset, LSID wr_lockset) {
-    DCHECK(vts);
-    DCHECK(tid.valid());
-    DCHECK(sid.valid());
-    Segment *seg = GetInternal(sid);
-    DCHECK(seg);
-    DCHECK(seg->seg_ref_count_ == 0);
-    seg->seg_ref_count_ = 0;
-    seg->tid_ = tid;
-    seg->lsid_[0] = rd_lockset;
-    seg->lsid_[1] = wr_lockset;
-    seg->vts_ = vts;
-    seg->lock_era_ = g_lock_era;
-    if (kSizeOfHistoryStackTrace) {
-      embedded_stack_trace(sid)[0] = 0;
-    }
-  }
-
-  static INLINE SID AddNewSegment(TID tid, VTS *vts,
-                           LSID rd_lockset, LSID wr_lockset) {
-    ScopedMallocCostCenter malloc_cc("Segment::AddNewSegment()");
-    SID sid;
-    AllocateFreshSegments(1, &sid);
-    SetupFreshSid(sid, tid, vts, rd_lockset, wr_lockset);
-    return sid;
-  }
-
-  static bool Alive(SID sid) {
-    Segment *seg = GetInternal(sid);
-    return seg->vts() != NULL;
-  }
-
-  static void AssertLive(SID sid, int line) {
-    if (DEBUG_MODE) {
-      if (!(sid.raw() < INTERNAL_ANNOTATE_UNPROTECTED_READ(n_segments_))) {
-        Printf("Segment::AssertLive: failed on sid=%d n_segments = %dline=%d\n",
-               sid.raw(), n_segments_, line);
-      }
-      Segment *seg = GetInternal(sid);
-      if (!seg->vts()) {
-        Printf("Segment::AssertLive: failed on sid=%d line=%d\n",
-               sid.raw(), line);
-      }
-      DCHECK(seg->vts());
-      DCHECK(seg->tid().valid());
-    }
-  }
-
-  static INLINE Segment *Get(SID sid) {
-    AssertLive(sid, __LINE__);
-    Segment *res = GetInternal(sid);
-    DCHECK(res->vts());
-    DCHECK(res->tid().valid());
-    return res;
-  }
-
-  static INLINE void RecycleOneFreshSid(SID sid) {
-    Segment *seg = GetInternal(sid);
-    seg->tid_ = TID();
-    seg->vts_ = NULL;
-    reusable_sids_->push_back(sid);
-    if (ProfileSeg(sid)) {
-      Printf("Segment: recycled SID %d\n", sid.raw());
-    }
-  }
-
-  static bool RecycleOneSid(SID sid) {
-    ScopedMallocCostCenter malloc_cc("Segment::RecycleOneSid()");
-    Segment *seg = GetInternal(sid);
-    DCHECK(seg->seg_ref_count_ == 0);
-    DCHECK(sid.raw() < n_segments_);
-    if (!seg->vts()) return false;  // Already recycled.
-    VTS::Unref(seg->vts_);
-    RecycleOneFreshSid(sid);
-    return true;
-  }
-
-  int32_t ref_count() const {
-    return INTERNAL_ANNOTATE_UNPROTECTED_READ(seg_ref_count_);
-  }
-
-  static void INLINE Ref(SID sid, const char *where) {
-    Segment *seg = GetInternal(sid);
-    if (ProfileSeg(sid)) {
-      Printf("SegRef   : %d ref=%d %s; tid=%d\n", sid.raw(),
-             seg->seg_ref_count_, where, seg->tid().raw());
-    }
-    DCHECK(seg->seg_ref_count_ >= 0);
-    AtomicIncrementRefcount(&seg->seg_ref_count_);
-  }
-
-  static INLINE intptr_t UnrefNoRecycle(SID sid, const char *where) {
-    Segment *seg = GetInternal(sid);
-    if (ProfileSeg(sid)) {
-      Printf("SegUnref : %d ref=%d %s\n", sid.raw(), seg->seg_ref_count_, where);
-    }
-    DCHECK(seg->seg_ref_count_ > 0);
-    return AtomicDecrementRefcount(&seg->seg_ref_count_);
-  }
-
-  static void INLINE Unref(SID sid, const char *where) {
-    if (UnrefNoRecycle(sid, where) == 0) {
-      RecycleOneSid(sid);
-    }
-  }
-
-
-  static void ForgetAllState() {
-    n_segments_ = 1;
-    reusable_sids_->clear();
-    // vts_'es will be freed in AddNewSegment.
-  }
-
-  static string ToString(SID sid) {
-    char buff[100];
-    snprintf(buff, sizeof(buff), "T%d/S%d", Get(sid)->tid().raw(), sid.raw());
-    return buff;
-  }
-
-  static string ToStringTidOnly(SID sid) {
-    char buff[100];
-    snprintf(buff, sizeof(buff), "T%d", Get(sid)->tid().raw());
-    return buff;
-  }
-
-  static string ToStringWithLocks(SID sid) {
-    char buff[100];
-    Segment *seg = Get(sid);
-    snprintf(buff, sizeof(buff), "T%d/S%d ", seg->tid().raw(), sid.raw());
-    string res = buff;
-    res += TwoLockSetsToString(seg->lsid(false), seg->lsid(true));
-    return res;
-  }
-
-  static bool INLINE HappensBeforeOrSameThread(SID a, SID b) {
-    if (a == b) return true;
-    if (Get(a)->tid() == Get(b)->tid()) return true;
-    return HappensBefore(a, b);
-  }
-
-  static bool INLINE HappensBefore(SID a, SID b) {
-    DCHECK(a != b);
-    G_stats->n_seg_hb++;
-    bool res = false;
-    const Segment *seg_a = Get(a);
-    const Segment *seg_b = Get(b);
-    DCHECK(seg_a->tid() != seg_b->tid());
-    const VTS *vts_a = seg_a->vts();
-    const VTS *vts_b = seg_b->vts();
-    res = VTS::HappensBeforeCached(vts_a, vts_b);
-#if 0
-    if (DEBUG_MODE) {
-      Printf("HB = %d\n  %s\n  %s\n", res,
-           vts_a->ToString().c_str(), vts_b->ToString().c_str());
-    }
-#endif
-    return res;
-  }
-
-  static int32_t NumberOfSegments() { return n_segments_; }
-
-  static void ShowSegmentStats() {
-    Printf("Segment::ShowSegmentStats:\n");
-    Printf("n_segments_: %d\n", n_segments_);
-    Printf("reusable_sids_: %ld\n", reusable_sids_->size());
-    map<int, int> ref_to_freq_map;
-    for (int i = 1; i < n_segments_; i++) {
-      Segment *seg = GetInternal(SID(i));
-      int32_t refcount = seg->seg_ref_count_;
-      if (refcount > 10) refcount = 10;
-      ref_to_freq_map[refcount]++;
-    }
-    for (map<int, int>::iterator it = ref_to_freq_map.begin();
-         it != ref_to_freq_map.end(); ++it) {
-      Printf("ref %d => freq %d\n", it->first, it->second);
-    }
-  }
-
-  static void InitClassMembers() {
-    if (G_flags->keep_history == 0)
-      kSizeOfHistoryStackTrace = 0;
-    Report("INFO: Allocating %ldMb (%ld * %ldM) for Segments.\n",
-           (sizeof(Segment) * kMaxSID) >> 20,
-           sizeof(Segment), kMaxSID >> 20);
-    if (kSizeOfHistoryStackTrace) {
-      Report("INFO: Will allocate up to %ldMb for 'previous' stack traces.\n",
-             (kSizeOfHistoryStackTrace * sizeof(uintptr_t) * kMaxSID) >> 20);
-    }
-
-    all_segments_  = new Segment[kMaxSID];
-    // initialization all segments to 0.
-    memset(all_segments_, 0, kMaxSID * sizeof(Segment));
-    // initialize all_segments_[0] with garbage
-    memset(all_segments_, -1, sizeof(Segment));
-
-    if (kSizeOfHistoryStackTrace > 0) {
-      n_stack_chunks_ = kMaxSID / kChunkSizeForStacks;
-      if (n_stack_chunks_ * kChunkSizeForStacks < (size_t)kMaxSID)
-        n_stack_chunks_++;
-      all_stacks_ = new uintptr_t*[n_stack_chunks_];
-      memset(all_stacks_, 0, sizeof(uintptr_t*) * n_stack_chunks_);
-    }
-    n_segments_    = 1;
-    reusable_sids_ = new vector<SID>;
-  }
-
- private:
-  static INLINE Segment *GetSegmentByIndex(int32_t index) {
-    return &all_segments_[index];
-  }
-  static INLINE Segment *GetInternal(SID sid) {
-    DCHECK(sid.valid());
-    DCHECK(sid.raw() < INTERNAL_ANNOTATE_UNPROTECTED_READ(n_segments_));
-    Segment *res = GetSegmentByIndex(sid.raw());
-    return res;
-  }
-
-  // Data members.
-  int32_t seg_ref_count_;
-  LSID     lsid_[2];
-  TID      tid_;
-  uint32_t lock_era_;
-  VTS *vts_;
-
-  // static class members.
-
-  // One large array of segments. The size is set by a command line (--max-sid)
-  // and never changes. Once we are out of vacant segments, we flush the state.
-  static Segment *all_segments_;
-  // We store stack traces separately because their size is unknown
-  // at compile time and because they are needed less often.
-  // The stacks are stored as an array of chunks, instead of one array, 
-  // so that for small tests we do not require too much RAM.
-  // We don't use vector<> or another resizable array to avoid expensive 
-  // resizing.
-  enum { kChunkSizeForStacks = DEBUG_MODE ? 512 : 1 * 1024 * 1024 };
-  static uintptr_t **all_stacks_;
-  static size_t      n_stack_chunks_;
-
-  static int32_t n_segments_;
-  static vector<SID> *reusable_sids_;
-};
-
-Segment          *Segment::all_segments_;
-uintptr_t       **Segment::all_stacks_;
-size_t            Segment::n_stack_chunks_;
-int32_t           Segment::n_segments_;
-vector<SID>      *Segment::reusable_sids_;
-
-// -------- SegmentSet -------------- {{{1
-class SegmentSet {
- public:
-  static NOINLINE SSID AddSegmentToSS(SSID old_ssid, SID new_sid);
-  static NOINLINE SSID RemoveSegmentFromSS(SSID old_ssid, SID sid_to_remove);
-
-  static INLINE SSID AddSegmentToTupleSS(SSID ssid, SID new_sid);
-  static INLINE SSID RemoveSegmentFromTupleSS(SSID old_ssid, SID sid_to_remove);
-
-  SSID ComputeSSID() {
-    SSID res = map_->GetIdOrZero(this);
-    CHECK_NE(res.raw(), 0);
-    return res;
-  }
-
-  int ref_count() const { return ref_count_; }
-
-  static void AssertLive(SSID ssid, int line) {
-    DCHECK(ssid.valid());
-    if (DEBUG_MODE) {
-      if (ssid.IsSingleton()) {
-        Segment::AssertLive(ssid.GetSingleton(), line);
-      } else {
-        DCHECK(ssid.IsTuple());
-        int idx = -ssid.raw()-1;
-        DCHECK(idx < static_cast<int>(vec_->size()));
-        DCHECK(idx >= 0);
-        SegmentSet *res = (*vec_)[idx];
-        DCHECK(res);
-        DCHECK(res->ref_count_ >= 0);
-        res->Validate(line);
-
-        if (!res) {
-          Printf("SegmentSet::AssertLive failed at line %d (ssid=%d)\n",
-                 line, ssid.raw());
-          DCHECK(0);
-        }
-      }
-    }
-  }
-
-  static SegmentSet *Get(SSID ssid) {
-    DCHECK(ssid.valid());
-    DCHECK(!ssid.IsSingleton());
-    int idx = -ssid.raw()-1;
-    ANNOTATE_IGNORE_READS_BEGIN();
-    DCHECK(idx < static_cast<int>(vec_->size()) && idx >= 0);
-    ANNOTATE_IGNORE_READS_END();
-    SegmentSet *res = (*vec_)[idx];
-    DCHECK(res);
-    DCHECK(res->size() >= 2);
-    return res;
-  }
-
-  void RecycleOneSegmentSet(SSID ssid) {
-    DCHECK(ref_count_ == 0);
-    DCHECK(ssid.valid());
-    DCHECK(!ssid.IsSingleton());
-    int idx = -ssid.raw()-1;
-    DCHECK(idx < static_cast<int>(vec_->size()) && idx >= 0);
-    CHECK((*vec_)[idx] == this);
-    // Printf("SegmentSet::RecycleOneSegmentSet: %d\n", ssid.raw());
-    //
-    // Recycle segments
-    for (int i = 0; i < kMaxSegmentSetSize; i++) {
-      SID sid = this->GetSID(i);
-      if (sid.raw() == 0) break;
-      Segment::Unref(sid, "SegmentSet::Recycle");
-    }
-    ref_count_ = -1;
-
-    map_->Erase(this);
-    ready_to_be_reused_->push_back(ssid);
-    G_stats->ss_recycle++;
-  }
-
-  static void INLINE Ref(SSID ssid, const char *where) {
-    AssertTILHeld(); // The reference counting logic below is not thread-safe
-    DCHECK(ssid.valid());
-    if (ssid.IsSingleton()) {
-      Segment::Ref(ssid.GetSingleton(), where);
-    } else {
-      SegmentSet *sset = Get(ssid);
-      // Printf("SSRef   : %d ref=%d %s\n", ssid.raw(), sset->ref_count_, where);
-      DCHECK(sset->ref_count_ >= 0);
-      sset->ref_count_++;
-    }
-  }
-
-  static void INLINE Unref(SSID ssid, const char *where) {
-    AssertTILHeld(); // The reference counting logic below is not thread-safe
-    DCHECK(ssid.valid());
-    if (ssid.IsSingleton()) {
-      Segment::Unref(ssid.GetSingleton(), where);
-    } else {
-      SegmentSet *sset = Get(ssid);
-      // Printf("SSUnref : %d ref=%d %s\n", ssid.raw(), sset->ref_count_, where);
-      DCHECK(sset->ref_count_ > 0);
-      sset->ref_count_--;
-      if (sset->ref_count_ == 0) {
-        // We don't delete unused SSID straightaway due to performance reasons
-        // (to avoid flushing caches too often and because SSID may be reused
-        // again soon)
-        //
-        // Instead, we use two queues (deques):
-        //    ready_to_be_recycled_ and ready_to_be_reused_.
-        // The algorithm is following:
-        // 1) When refcount_ becomes zero, we push the SSID into
-        //    ready_to_be_recycled_.
-        // 2) When ready_to_be_recycled_ becomes too large, we call
-        //    FlushRecycleQueue().
-        //    In FlushRecycleQueue(), we pop the first half of
-        //    ready_to_be_recycled_ and for each popped SSID we do
-        //     * if "refcount_ > 0", do nothing (this SSID is in use again)
-        //     * otherwise, we recycle this SSID (delete its VTS, etc) and push
-        //       it into ready_to_be_reused_
-        // 3) When a new SegmentSet is about to be created, we re-use SSID from
-        //    ready_to_be_reused_ (if available)
-        ready_to_be_recycled_->push_back(ssid);
-        if (UNLIKELY(ready_to_be_recycled_->size() >
-                     2 * G_flags->segment_set_recycle_queue_size)) {
-          FlushRecycleQueue();
-        }
-      }
-    }
-  }
-
-  static void FlushRecycleQueue() {
-    while (ready_to_be_recycled_->size() >
-        G_flags->segment_set_recycle_queue_size) {
-      SSID rec_ssid = ready_to_be_recycled_->front();
-      ready_to_be_recycled_->pop_front();
-      int idx = -rec_ssid.raw()-1;
-      SegmentSet *rec_ss = (*vec_)[idx];
-      DCHECK(rec_ss);
-      DCHECK(rec_ss == Get(rec_ssid));
-      // We should check that this SSID haven't been referenced again.
-      if (rec_ss->ref_count_ == 0) {
-        rec_ss->RecycleOneSegmentSet(rec_ssid);
-      }
-    }
-
-    // SSIDs will be reused soon - need to flush some caches.
-    FlushCaches();
-  }
-
-  string ToString() const;
-  void Print() {
-    Printf("SS%d:%s\n", -ComputeSSID().raw(), ToString().c_str());
-  }
-
-  static string ToString(SSID ssid) {
-    CHECK(ssid.IsValidOrEmpty());
-    if (ssid.IsSingleton()) {
-      return "{" +  Segment::ToStringTidOnly(SID(ssid.raw())) + "}";
-    } else if (ssid.IsEmpty()) {
-      return "{}";
-    } else {
-      AssertLive(ssid, __LINE__);
-      return Get(ssid)->ToString();
-    }
-  }
-
-
-  static string ToStringWithLocks(SSID ssid);
-
-  static void FlushCaches() {
-    add_segment_cache_->Flush();
-    remove_segment_cache_->Flush();
-  }
-
-  static void ForgetAllState() {
-    for (size_t i = 0; i < vec_->size(); i++) {
-      delete (*vec_)[i];
-    }
-    map_->Clear();
-    vec_->clear();
-    ready_to_be_reused_->clear();
-    ready_to_be_recycled_->clear();
-    FlushCaches();
-  }
-
-
-  static void Test();
-
-  static int32_t Size(SSID ssid) {
-    if (ssid.IsEmpty()) return 0;
-    if (ssid.IsSingleton()) return 1;
-    return Get(ssid)->size();
-  }
-
-  SID GetSID(int32_t i) const {
-    DCHECK(i >= 0 && i < kMaxSegmentSetSize);
-    DCHECK(i == 0 || sids_[i-1].raw() != 0);
-    return sids_[i];
-  }
-
-  void SetSID(int32_t i, SID sid) {
-    DCHECK(i >= 0 && i < kMaxSegmentSetSize);
-    DCHECK(i == 0 || sids_[i-1].raw() != 0);
-    sids_[i] = sid;
-  }
-
-  static SID GetSID(SSID ssid, int32_t i, int line) {
-    DCHECK(ssid.valid());
-    if (ssid.IsSingleton()) {
-      DCHECK(i == 0);
-      Segment::AssertLive(ssid.GetSingleton(), line);
-      return ssid.GetSingleton();
-    } else {
-      AssertLive(ssid, __LINE__);
-      SID sid = Get(ssid)->GetSID(i);
-      Segment::AssertLive(sid, line);
-      return sid;
-    }
-  }
-
-  static bool INLINE Contains(SSID ssid, SID seg) {
-    if (LIKELY(ssid.IsSingleton())) {
-      return ssid.GetSingleton() == seg;
-    } else if (LIKELY(ssid.IsEmpty())) {
-      return false;
-    }
-
-    SegmentSet *ss = Get(ssid);
-    for (int i = 0; i < kMaxSegmentSetSize; i++) {
-      SID sid = ss->GetSID(i);
-      if (sid.raw() == 0) break;
-      if (sid == seg)
-        return true;
-    }
-    return false;
-  }
-
-  static Segment *GetSegmentForNonSingleton(SSID ssid, int32_t i, int line) {
-    return Segment::Get(GetSID(ssid, i, line));
-  }
-
-  void NOINLINE Validate(int line) const;
-
-  static size_t NumberOfSegmentSets() { return vec_->size(); }
-
-
-  static void InitClassMembers() {
-    map_    = new Map;
-    vec_    = new vector<SegmentSet *>;
-    ready_to_be_recycled_ = new deque<SSID>;
-    ready_to_be_reused_ = new deque<SSID>;
-    add_segment_cache_ = new SsidSidToSidCache;
-    remove_segment_cache_ = new SsidSidToSidCache;
-  }
-
- private:
-  SegmentSet()  // Private CTOR
-    : ref_count_(0) {
-    // sids_ are filled with zeroes due to SID default CTOR.
-    if (DEBUG_MODE) {
-      for (int i = 0; i < kMaxSegmentSetSize; i++)
-        CHECK_EQ(sids_[i].raw(), 0);
-    }
-  }
-
-  int size() const {
-    for (int i = 0; i < kMaxSegmentSetSize; i++) {
-      if (sids_[i].raw() == 0) {
-        CHECK_GE(i, 2);
-        return i;
-      }
-    }
-    return kMaxSegmentSetSize;
-  }
-
-  static INLINE SSID AllocateAndCopy(SegmentSet *ss) {
-    DCHECK(ss->ref_count_ == 0);
-    DCHECK(sizeof(int32_t) == sizeof(SID));
-    SSID res_ssid;
-    SegmentSet *res_ss = 0;
-
-    if (!ready_to_be_reused_->empty()) {
-      res_ssid = ready_to_be_reused_->front();
-      ready_to_be_reused_->pop_front();
-      int idx = -res_ssid.raw()-1;
-      res_ss = (*vec_)[idx];
-      DCHECK(res_ss);
-      DCHECK(res_ss->ref_count_ == -1);
-      G_stats->ss_reuse++;
-      for (int i = 0; i < kMaxSegmentSetSize; i++) {
-        res_ss->sids_[i] = SID(0);
-      }
-    } else {
-      // create a new one
-      ScopedMallocCostCenter cc("SegmentSet::CreateNewSegmentSet");
-      G_stats->ss_create++;
-      res_ss = new SegmentSet;
-      vec_->push_back(res_ss);
-      res_ssid = SSID(-((int32_t)vec_->size()));
-      CHECK(res_ssid.valid());
-    }
-    DCHECK(res_ss);
-    res_ss->ref_count_ = 0;
-    for (int i = 0; i < kMaxSegmentSetSize; i++) {
-      SID sid = ss->GetSID(i);
-      if (sid.raw() == 0) break;
-      Segment::Ref(sid, "SegmentSet::FindExistingOrAlocateAndCopy");
-      res_ss->SetSID(i, sid);
-    }
-    DCHECK(res_ss == Get(res_ssid));
-    map_->Insert(res_ss, res_ssid);
-    return res_ssid;
-  }
-
-  static NOINLINE SSID FindExistingOrAlocateAndCopy(SegmentSet *ss) {
-    if (DEBUG_MODE) {
-      int size = ss->size();
-      if (size == 2) G_stats->ss_size_2++;
-      if (size == 3) G_stats->ss_size_3++;
-      if (size == 4) G_stats->ss_size_4++;
-      if (size > 4) G_stats->ss_size_other++;
-    }
-
-    // First, check if there is such set already.
-    SSID ssid = map_->GetIdOrZero(ss);
-    if (ssid.raw() != 0) {  // Found.
-      AssertLive(ssid, __LINE__);
-      G_stats->ss_find++;
-      return ssid;
-    }
-    // If no such set, create one.
-    return AllocateAndCopy(ss);
-  }
-
-  static INLINE SSID DoubletonSSID(SID sid1, SID sid2) {
-    SegmentSet tmp;
-    tmp.SetSID(0, sid1);
-    tmp.SetSID(1, sid2);
-    return FindExistingOrAlocateAndCopy(&tmp);
-  }
-
-  // testing only
-  static SegmentSet *AddSegmentToTupleSS(SegmentSet *ss, SID new_sid) {
-    SSID ssid = AddSegmentToTupleSS(ss->ComputeSSID(), new_sid);
-    AssertLive(ssid, __LINE__);
-    return Get(ssid);
-  }
-
-  static SegmentSet *Doubleton(SID sid1, SID sid2) {
-    SSID ssid = DoubletonSSID(sid1, sid2);
-    AssertLive(ssid, __LINE__);
-    return Get(ssid);
-  }
-
-  // static data members
-  struct Less {
-    INLINE bool operator() (const SegmentSet *ss1,
-                            const SegmentSet *ss2) const {
-      for (int i = 0; i < kMaxSegmentSetSize; i++) {
-        SID sid1 = ss1->sids_[i],
-            sid2 = ss2->sids_[i];
-        if (sid1 != sid2) return sid1 < sid2;
-      }
-      return false;
-    }
-  };
-
-  struct SSEq {
-    INLINE bool operator() (const SegmentSet *ss1,
-                            const SegmentSet *ss2) const {
-      G_stats->sseq_calls++;
-
-      for (int i = 0; i < kMaxSegmentSetSize; i++) {
-        SID sid1 = ss1->sids_[i],
-            sid2 = ss2->sids_[i];
-        if (sid1 != sid2) return false;
-      }
-      return true;
-    }
-  };
-
-  struct SSHash {
-    INLINE size_t operator() (const SegmentSet *ss) const {
-      uintptr_t res = 0;
-      uint32_t* sids_array = (uint32_t*)ss->sids_;
-      // We must have even number of SIDs.
-      DCHECK((kMaxSegmentSetSize % 2) == 0);
-
-      G_stats->sshash_calls++;
-      // xor all SIDs together, half of them bswap-ed.
-      for (int i = 0; i < kMaxSegmentSetSize; i += 2) {
-        uintptr_t t1 = sids_array[i];
-        uintptr_t t2 = sids_array[i+1];
-        if (t2) t2 = tsan_bswap(t2);
-        res = res ^ t1 ^ t2;
-      }
-      return res;
-    }
-  };
-
-  struct SSTraits {
-    enum {
-      // These values are taken from the hash_compare defaults.
-      bucket_size = 4,  // Must be greater than zero.
-      min_buckets = 8,  // Must be power of 2.
-    };
-
-    INLINE size_t operator()(const SegmentSet *ss) const {
-      SSHash sshash;
-      return sshash(ss);
-    }
-
-    INLINE bool operator()(const SegmentSet *ss1, const SegmentSet *ss2) const {
-      Less less;
-      return less(ss1, ss2);
-    }
-  };
-
-  template <class MapType>
-  static SSID GetIdOrZeroFromMap(MapType *map, SegmentSet *ss) {
-    typename MapType::iterator it = map->find(ss);
-    if (it == map->end())
-      return SSID(0);
-    return it->second;
-  }
-
-  class Map {
-   public:
-    SSID GetIdOrZero(SegmentSet *ss) {
-      return GetIdOrZeroFromMap(&map_, ss);
-    }
-
-    void Insert(SegmentSet *ss, SSID id) {
-      map_[ss] = id;
-    }
-
-    void Erase(SegmentSet *ss) {
-      CHECK(map_.erase(ss));
-    }
-
-    void Clear() {
-      map_.clear();
-    }
-
-   private:
-    // TODO(timurrrr): consider making a custom hash_table.
-#if defined(_MSC_VER)
-    typedef stdext::hash_map<SegmentSet*, SSID, SSTraits > MapType__;
-#elif 1
-    typedef unordered_map<SegmentSet*, SSID, SSHash, SSEq > MapType__;
-#else
-    // Old code, may be useful for debugging.
-    typedef map<SegmentSet*, SSID, Less > MapType__;
-#endif
-    MapType__ map_;
-  };
-
-//  typedef map<SegmentSet*, SSID, Less> Map;
-
-  static Map                  *map_;
-  // TODO(kcc): use vector<SegmentSet> instead.
-  static vector<SegmentSet *> *vec_;
-  static deque<SSID>         *ready_to_be_reused_;
-  static deque<SSID>         *ready_to_be_recycled_;
-
-  typedef PairCache<SSID, SID, SSID, 1009, 1> SsidSidToSidCache;
-  static SsidSidToSidCache    *add_segment_cache_;
-  static SsidSidToSidCache    *remove_segment_cache_;
-
-  // sids_ contains up to kMaxSegmentSetSize SIDs.
-  // Contains zeros at the end if size < kMaxSegmentSetSize.
-  SID     sids_[kMaxSegmentSetSize];
-  int32_t ref_count_;
-};
-
-SegmentSet::Map      *SegmentSet::map_;
-vector<SegmentSet *> *SegmentSet::vec_;
-deque<SSID>         *SegmentSet::ready_to_be_reused_;
-deque<SSID>         *SegmentSet::ready_to_be_recycled_;
-SegmentSet::SsidSidToSidCache    *SegmentSet::add_segment_cache_;
-SegmentSet::SsidSidToSidCache    *SegmentSet::remove_segment_cache_;
-
-
-
-
-SSID SegmentSet::RemoveSegmentFromSS(SSID old_ssid, SID sid_to_remove) {
-  DCHECK(old_ssid.IsValidOrEmpty());
-  DCHECK(sid_to_remove.valid());
-  SSID res;
-  if (remove_segment_cache_->Lookup(old_ssid, sid_to_remove, &res)) {
-    return res;
-  }
-
-  if (old_ssid.IsEmpty()) {
-    res = old_ssid;  // Nothing to remove.
-  } else if (LIKELY(old_ssid.IsSingleton())) {
-    SID sid = old_ssid.GetSingleton();
-    if (Segment::HappensBeforeOrSameThread(sid, sid_to_remove))
-      res = SSID(0);  // Empty.
-    else
-      res = old_ssid;
-  } else {
-    res = RemoveSegmentFromTupleSS(old_ssid, sid_to_remove);
-  }
-  remove_segment_cache_->Insert(old_ssid, sid_to_remove, res);
-  return res;
-}
-
-
-// static
-//
-// This method returns a SSID of a SegmentSet containing "new_sid" and all those
-// segments from "old_ssid" which do not happen-before "new_sid".
-//
-// For details, see
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerAlgorithm#State_machine
-SSID SegmentSet::AddSegmentToSS(SSID old_ssid, SID new_sid) {
-  DCHECK(old_ssid.raw() == 0 || old_ssid.valid());
-  DCHECK(new_sid.valid());
-  Segment::AssertLive(new_sid, __LINE__);
-  SSID res;
-
-  // These two TIDs will only be used if old_ssid.IsSingleton() == true.
-  TID old_tid;
-  TID new_tid;
-
-  if (LIKELY(old_ssid.IsSingleton())) {
-    SID old_sid(old_ssid.raw());
-    DCHECK(old_sid.valid());
-    Segment::AssertLive(old_sid, __LINE__);
-
-    if (UNLIKELY(old_sid == new_sid)) {
-      // The new segment equals the old one - nothing has changed.
-      return old_ssid;
-    }
-
-    old_tid = Segment::Get(old_sid)->tid();
-    new_tid = Segment::Get(new_sid)->tid();
-    if (LIKELY(old_tid == new_tid)) {
-      // The new segment is in the same thread - just replace the SID.
-      return SSID(new_sid);
-    }
-
-    if (Segment::HappensBefore(old_sid, new_sid)) {
-      // The new segment is in another thread, but old segment
-      // happens before the new one - just replace the SID.
-      return SSID(new_sid);
-    }
-
-    DCHECK(!Segment::HappensBefore(new_sid, old_sid));
-    // The only other case is Signleton->Doubleton transition, see below.
-  } else if (LIKELY(old_ssid.IsEmpty())) {
-    return SSID(new_sid);
-  }
-
-  // Lookup the cache.
-  if (add_segment_cache_->Lookup(old_ssid, new_sid, &res)) {
-    SegmentSet::AssertLive(res, __LINE__);
-    return res;
-  }
-
-  if (LIKELY(old_ssid.IsSingleton())) {
-    // Signleton->Doubleton transition.
-    // These two TIDs were initialized before cache lookup (see above).
-    DCHECK(old_tid.valid());
-    DCHECK(new_tid.valid());
-
-    SID old_sid(old_ssid.raw());
-    DCHECK(old_sid.valid());
-
-    DCHECK(!Segment::HappensBefore(new_sid, old_sid));
-    DCHECK(!Segment::HappensBefore(old_sid, new_sid));
-    res = (old_tid < new_tid
-      ? DoubletonSSID(old_sid, new_sid)
-      : DoubletonSSID(new_sid, old_sid));
-    SegmentSet::AssertLive(res, __LINE__);
-  } else {
-    res = AddSegmentToTupleSS(old_ssid, new_sid);
-    SegmentSet::AssertLive(res, __LINE__);
-  }
-
-  // Put the result into cache.
-  add_segment_cache_->Insert(old_ssid, new_sid, res);
-
-  return res;
-}
-
-SSID SegmentSet::RemoveSegmentFromTupleSS(SSID ssid, SID sid_to_remove) {
-  DCHECK(ssid.IsTuple());
-  DCHECK(ssid.valid());
-  AssertLive(ssid, __LINE__);
-  SegmentSet *ss = Get(ssid);
-
-  int32_t old_size = 0, new_size = 0;
-  SegmentSet tmp;
-  SID * tmp_sids = tmp.sids_;
-  CHECK(sizeof(int32_t) == sizeof(SID));
-
-  for (int i = 0; i < kMaxSegmentSetSize; i++, old_size++) {
-    SID sid = ss->GetSID(i);
-    if (sid.raw() == 0) break;
-    DCHECK(sid.valid());
-    Segment::AssertLive(sid, __LINE__);
-    if (Segment::HappensBeforeOrSameThread(sid, sid_to_remove))
-      continue;  // Skip this segment from the result.
-    tmp_sids[new_size++] = sid;
-  }
-
-  if (new_size == old_size) return ssid;
-  if (new_size == 0) return SSID(0);
-  if (new_size == 1) return SSID(tmp_sids[0]);
-
-  if (DEBUG_MODE) tmp.Validate(__LINE__);
-
-  SSID res = FindExistingOrAlocateAndCopy(&tmp);
-  if (DEBUG_MODE) Get(res)->Validate(__LINE__);
-  return res;
-}
-
-//  static
-SSID SegmentSet::AddSegmentToTupleSS(SSID ssid, SID new_sid) {
-  DCHECK(ssid.IsTuple());
-  DCHECK(ssid.valid());
-  AssertLive(ssid, __LINE__);
-  SegmentSet *ss = Get(ssid);
-
-  Segment::AssertLive(new_sid, __LINE__);
-  const Segment *new_seg = Segment::Get(new_sid);
-  TID            new_tid = new_seg->tid();
-
-  int32_t old_size = 0, new_size = 0;
-  SID tmp_sids[kMaxSegmentSetSize + 1];
-  CHECK(sizeof(int32_t) == sizeof(SID));
-  bool inserted_new_sid = false;
-  // traverse all SID in current ss. tids are ordered.
-  for (int i = 0; i < kMaxSegmentSetSize; i++, old_size++) {
-    SID sid = ss->GetSID(i);
-    if (sid.raw() == 0) break;
-    DCHECK(sid.valid());
-    Segment::AssertLive(sid, __LINE__);
-    const Segment *seg = Segment::Get(sid);
-    TID            tid = seg->tid();
-
-    if (sid == new_sid) {
-      // we are trying to insert a sid which is already there.
-      // SS will not change.
-      return ssid;
-    }
-
-    if (tid == new_tid) {
-      if (seg->vts() == new_seg->vts() &&
-          seg->lsid(true) == new_seg->lsid(true) &&
-          seg->lsid(false) == new_seg->lsid(false)) {
-        // Optimization: if a segment with the same VTS and LS
-        // as in the current is already inside SS, don't modify the SS.
-        // Improves performance with --keep-history >= 1.
-        return ssid;
-      }
-      // we have another segment from the same thread => replace it.
-      tmp_sids[new_size++] = new_sid;
-      inserted_new_sid = true;
-      continue;
-    }
-
-    if (tid > new_tid && !inserted_new_sid) {
-      // there was no segment with this tid, put it now.
-      tmp_sids[new_size++] = new_sid;
-      inserted_new_sid = true;
-    }
-
-    if (!Segment::HappensBefore(sid, new_sid)) {
-      DCHECK(!Segment::HappensBefore(new_sid, sid));
-      tmp_sids[new_size++] = sid;
-    }
-  }
-
-  if (!inserted_new_sid) {
-    tmp_sids[new_size++] = new_sid;
-  }
-
-  CHECK_GT(new_size, 0);
-  if (new_size == 1) {
-    return SSID(new_sid.raw());  // Singleton.
-  }
-
-  if (new_size > kMaxSegmentSetSize) {
-    CHECK(new_size == kMaxSegmentSetSize + 1);
-    // we need to forget one segment. Which? The oldest one.
-    int seg_to_forget = 0;
-    Segment *oldest_segment = NULL;
-    for (int i = 0; i < new_size; i++) {
-      SID sid = tmp_sids[i];
-      if (sid == new_sid) continue;
-      Segment *s = Segment::Get(tmp_sids[i]);
-      if (oldest_segment == NULL ||
-          oldest_segment->vts()->uniq_id() > s->vts()->uniq_id()) {
-        oldest_segment = s;
-        seg_to_forget = i;
-      }
-    }
-    DCHECK(oldest_segment);
-
-    // Printf("seg_to_forget: %d T%d\n", tmp_sids[seg_to_forget].raw(),
-    //        oldest_segment->tid().raw());
-    for (int i = seg_to_forget; i < new_size - 1; i++) {
-      tmp_sids[i] = tmp_sids[i+1];
-    }
-    new_size--;
-  }
-
-  CHECK(new_size <= kMaxSegmentSetSize);
-  SegmentSet tmp;
-  for (int i = 0; i < new_size; i++)
-    tmp.sids_[i] = tmp_sids[i];  // TODO(timurrrr): avoid copying?
-  if (DEBUG_MODE) tmp.Validate(__LINE__);
-
-  SSID res = FindExistingOrAlocateAndCopy(&tmp);
-  if (DEBUG_MODE) Get(res)->Validate(__LINE__);
-  return res;
-}
-
-
-
-void NOINLINE SegmentSet::Validate(int line) const {
-  // This is expensive!
-  int my_size = size();
-  for (int i = 0; i < my_size; i++) {
-    SID sid1 = GetSID(i);
-    CHECK(sid1.valid());
-    Segment::AssertLive(sid1, __LINE__);
-
-    for (int j = i + 1; j < my_size; j++) {
-      SID sid2 = GetSID(j);
-      CHECK(sid2.valid());
-      Segment::AssertLive(sid2, __LINE__);
-
-      bool hb1 = Segment::HappensBefore(sid1, sid2);
-      bool hb2 = Segment::HappensBefore(sid2, sid1);
-      if (hb1 || hb2) {
-        Printf("BAD at line %d: %d %d %s %s\n   %s\n   %s\n",
-               line, static_cast<int>(hb1), static_cast<int>(hb2),
-               Segment::ToString(sid1).c_str(),
-               Segment::ToString(sid2).c_str(),
-               Segment::Get(sid1)->vts()->ToString().c_str(),
-               Segment::Get(sid2)->vts()->ToString().c_str());
-      }
-      CHECK(!Segment::HappensBefore(GetSID(i), GetSID(j)));
-      CHECK(!Segment::HappensBefore(GetSID(j), GetSID(i)));
-      CHECK(Segment::Get(sid1)->tid() < Segment::Get(sid2)->tid());
-    }
-  }
-
-  for (int i = my_size; i < kMaxSegmentSetSize; i++) {
-    CHECK_EQ(sids_[i].raw(), 0);
-  }
-}
-
-string SegmentSet::ToStringWithLocks(SSID ssid) {
-  if (ssid.IsEmpty()) return "";
-  string res = "";
-  for (int i = 0; i < Size(ssid); i++) {
-    SID sid = GetSID(ssid, i, __LINE__);
-    if (i) res += ", ";
-    res += Segment::ToStringWithLocks(sid);
-  }
-  return res;
-}
-
-string SegmentSet::ToString() const {
-  Validate(__LINE__);
-  string res = "{";
-  for (int i = 0; i < size(); i++) {
-    SID sid = GetSID(i);
-    if (i) res += ", ";
-    CHECK(sid.valid());
-    Segment::AssertLive(sid, __LINE__);
-    res += Segment::ToStringTidOnly(sid).c_str();
-  }
-  res += "}";
-  return res;
-}
-
-// static
-void SegmentSet::Test() {
-  LSID ls(0);  // dummy
-  SID sid1 = Segment::AddNewSegment(TID(0), VTS::Parse("[0:2;]"), ls, ls);
-  SID sid2 = Segment::AddNewSegment(TID(1), VTS::Parse("[0:1; 1:1]"), ls, ls);
-  SID sid3 = Segment::AddNewSegment(TID(2), VTS::Parse("[0:1; 2:1]"), ls, ls);
-  SID sid4 = Segment::AddNewSegment(TID(3), VTS::Parse("[0:1; 3:1]"), ls, ls);
-  SID sid5 = Segment::AddNewSegment(TID(4), VTS::Parse("[0:3; 2:2; 3:2;]"),
-                                    ls, ls);
-  SID sid6 = Segment::AddNewSegment(TID(4), VTS::Parse("[0:3; 1:2; 2:2; 3:2;]"),
-                                    ls, ls);
-
-
-  // SS1:{T0/S1, T2/S3}
-  SegmentSet *d1 = SegmentSet::Doubleton(sid1, sid3);
-  d1->Print();
-  CHECK(SegmentSet::Doubleton(sid1, sid3) == d1);
-  // SS2:{T0/S1, T1/S2, T2/S3}
-  SegmentSet *d2 = SegmentSet::AddSegmentToTupleSS(d1, sid2);
-  CHECK(SegmentSet::AddSegmentToTupleSS(d1, sid2) == d2);
-  d2->Print();
-
-  // SS3:{T0/S1, T2/S3, T3/S4}
-  SegmentSet *d3 = SegmentSet::AddSegmentToTupleSS(d1, sid4);
-  CHECK(SegmentSet::AddSegmentToTupleSS(d1, sid4) == d3);
-  d3->Print();
-
-  // SS4:{T0/S1, T1/S2, T2/S3, T3/S4}
-  SegmentSet *d4 = SegmentSet::AddSegmentToTupleSS(d2, sid4);
-  CHECK(SegmentSet::AddSegmentToTupleSS(d2, sid4) == d4);
-  CHECK(SegmentSet::AddSegmentToTupleSS(d3, sid2) == d4);
-  d4->Print();
-
-  // SS5:{T1/S2, T4/S5}
-  SegmentSet *d5 = SegmentSet::AddSegmentToTupleSS(d4, sid5);
-  d5->Print();
-
-  SSID ssid6 = SegmentSet::AddSegmentToTupleSS(d4->ComputeSSID(), sid6);
-  CHECK(ssid6.IsSingleton());
-  Printf("%s\n", ToString(ssid6).c_str());
-  CHECK_EQ(sid6.raw(), 6);
-  CHECK_EQ(ssid6.raw(), 6);
-}
-
-// -------- Shadow Value ------------ {{{1
-class ShadowValue {
- public:
-  ShadowValue() {
-    if (DEBUG_MODE) {
-      rd_ssid_ = 0xDEADBEEF;
-      wr_ssid_ = 0xDEADBEEF;
-    }
-  }
-
-  void Clear() {
-    rd_ssid_ = 0;
-    wr_ssid_ = 0;
-  }
-
-  INLINE bool IsNew() const { return rd_ssid_ == 0 && wr_ssid_ == 0; }
-  // new experimental state machine.
-  SSID rd_ssid() const { return SSID(rd_ssid_); }
-  SSID wr_ssid() const { return SSID(wr_ssid_); }
-  INLINE void set(SSID rd_ssid, SSID wr_ssid) {
-    rd_ssid_ = rd_ssid.raw();
-    wr_ssid_ = wr_ssid.raw();
-  }
-
-  // comparison
-  INLINE bool operator == (const ShadowValue &sval) const {
-    return rd_ssid_ == sval.rd_ssid_ &&
-        wr_ssid_ == sval.wr_ssid_;
-  }
-  bool operator != (const ShadowValue &sval) const {
-    return !(*this == sval);
-  }
-  bool operator <  (const ShadowValue &sval) const {
-    if (rd_ssid_ < sval.rd_ssid_) return true;
-    if (rd_ssid_ == sval.rd_ssid_ && wr_ssid_ < sval.wr_ssid_) return true;
-    return false;
-  }
-
-  void Ref(const char *where) {
-    if (!rd_ssid().IsEmpty()) {
-      DCHECK(rd_ssid().valid());
-      SegmentSet::Ref(rd_ssid(), where);
-    }
-    if (!wr_ssid().IsEmpty()) {
-      DCHECK(wr_ssid().valid());
-      SegmentSet::Ref(wr_ssid(), where);
-    }
-  }
-
-  void Unref(const char *where) {
-    if (!rd_ssid().IsEmpty()) {
-      DCHECK(rd_ssid().valid());
-      SegmentSet::Unref(rd_ssid(), where);
-    }
-    if (!wr_ssid().IsEmpty()) {
-      DCHECK(wr_ssid().valid());
-      SegmentSet::Unref(wr_ssid(), where);
-    }
-  }
-
-  string ToString() const {
-    char buff[1000];
-    if (IsNew()) {
-      return "{New}";
-    }
-    snprintf(buff, sizeof(buff), "R: %s; W: %s",
-            SegmentSet::ToStringWithLocks(rd_ssid()).c_str(),
-            SegmentSet::ToStringWithLocks(wr_ssid()).c_str());
-    return buff;
-  }
-
- private:
-  int32_t rd_ssid_;
-  int32_t wr_ssid_;
-};
-
-// -------- CacheLine --------------- {{{1
-// The CacheLine is a set of Mask::kNBits (32 or 64) Shadow Values.
-// The shadow values in a cache line are grouped in subsets of 8 values.
-// If a particular address of memory is always accessed by aligned 8-byte
-// read/write instructions, only the shadow value correspoding to the
-// first byte is set, the rest shadow values are not used.
-// Ditto to aligned 4- and 2-byte accesses.
-// If a memory was accessed as 8 bytes and then it was accesed as 4 bytes,
-// (e.g. someone used a C union) we need to split the shadow value into two.
-// If the memory was accessed as 4 bytes and is now accessed as 8 bytes,
-// we need to try joining the shadow values.
-//
-// Hence the concept of granularity_mask (which is a string of 16 bits).
-// 0000000000000000 -- no accesses were observed to these 8 bytes.
-// 0000000000000001 -- all accesses were 8 bytes (aligned).
-// 0000000000000110 -- all accesses were 4 bytes (aligned).
-// 0000000001111000 -- all accesses were 2 bytes (aligned).
-// 0111111110000000 -- all accesses were 1 byte.
-// 0110000000100010 -- First 4 bytes were accessed by 4 byte insns,
-//   next 2 bytes by 2 byte insns, last 2 bytes by 1 byte insns.
-
-
-INLINE bool GranularityIs8(uintptr_t off, uint16_t gr) {
-  return gr & 1;
-}
-
-INLINE bool GranularityIs4(uintptr_t off, uint16_t gr) {
-  uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
-  return ((gr >> (1 + off_within_8_bytes)) & 1);
-}
-
-INLINE bool GranularityIs2(uintptr_t off, uint16_t gr) {
-  uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
-  return ((gr >> (3 + off_within_8_bytes)) & 1);
-}
-
-INLINE bool GranularityIs1(uintptr_t off, uint16_t gr) {
-  uintptr_t off_within_8_bytes = (off) & 7;       // 0, ..., 7
-  return ((gr >> (7 + off_within_8_bytes)) & 1);
-}
-
-class CacheLine {
- public:
-  static const uintptr_t kLineSizeBits = Mask::kNBitsLog;  // Don't change this.
-  static const uintptr_t kLineSize = Mask::kNBits;
-
-  static CacheLine *CreateNewCacheLine(uintptr_t tag) {
-    ScopedMallocCostCenter cc("CreateNewCacheLine");
-    void *mem = free_list_->Allocate();
-    DCHECK(mem);
-    return new (mem) CacheLine(tag);
-  }
-
-  static void Delete(CacheLine *line) {
-    free_list_->Deallocate(line);
-  }
-
-  const Mask &has_shadow_value() const { return has_shadow_value_;  }
-  Mask &traced() { return traced_; }
-  Mask &published() { return published_; }
-  Mask &racey()  { return racey_; }
-  uintptr_t tag() { return tag_; }
-
-  void DebugTrace(uintptr_t off, const char *where_str, int where_int) {
-    (void)off;
-    (void)where_str;
-    (void)where_int;
-#if 0
-    if (DEBUG_MODE && tag() == G_flags->trace_addr) {
-      uintptr_t off8 = off & ~7;
-      Printf("CacheLine %p, off=%ld off8=%ld gr=%d "
-             "has_sval: %d%d%d%d%d%d%d%d (%s:%d)\n",
-             tag(), off, off8,
-             granularity_[off/8],
-             has_shadow_value_.Get(off8 + 0),
-             has_shadow_value_.Get(off8 + 1),
-             has_shadow_value_.Get(off8 + 2),
-             has_shadow_value_.Get(off8 + 3),
-             has_shadow_value_.Get(off8 + 4),
-             has_shadow_value_.Get(off8 + 5),
-             has_shadow_value_.Get(off8 + 6),
-             has_shadow_value_.Get(off8 + 7),
-             where_str, where_int
-             );
-    }
-#endif
-  }
-
-  // Add a new shadow value to a place where there was no shadow value before.
-  ShadowValue *AddNewSvalAtOffset(uintptr_t off) {
-    DebugTrace(off, __FUNCTION__, __LINE__);
-    CHECK(!has_shadow_value().Get(off));
-    has_shadow_value_.Set(off);
-    published_.Clear(off);
-    ShadowValue *res = GetValuePointer(off);
-    res->Clear();
-    DebugTrace(off, __FUNCTION__, __LINE__);
-    return res;
-  }
-
-  // Return true if this line has no useful information in it.
-  bool Empty() {
-    // The line has shadow values.
-    if (!has_shadow_value().Empty()) return false;
-    // If the line is traced, racey or published, we want to keep it.
-    if (!traced().Empty()) return false;
-    if (!racey().Empty()) return false;
-    if (!published().Empty()) return false;
-    return true;
-  }
-
-  INLINE Mask ClearRangeAndReturnOldUsed(uintptr_t from, uintptr_t to) {
-    traced_.ClearRange(from, to);
-    published_.ClearRange(from, to);
-    racey_.ClearRange(from, to);
-    for (uintptr_t x = (from + 7) / 8; x < to / 8; x++) {
-      granularity_[x] = 0;
-    }
-    return has_shadow_value_.ClearRangeAndReturnOld(from, to);
-  }
-
-  void Clear() {
-    has_shadow_value_.Clear();
-    traced_.Clear();
-    published_.Clear();
-    racey_.Clear();
-    for (size_t i = 0; i < TS_ARRAY_SIZE(granularity_); i++)
-      granularity_[i] = 0;
-  }
-
-  ShadowValue *GetValuePointer(uintptr_t offset) {
-    DCHECK(offset < kLineSize);
-    return  &vals_[offset];
-  }
-  ShadowValue  GetValue(uintptr_t offset) { return *GetValuePointer(offset); }
-
-  static uintptr_t ComputeOffset(uintptr_t a) {
-    return a & (kLineSize - 1);
-  }
-  static uintptr_t ComputeTag(uintptr_t a) {
-    return a & ~(kLineSize - 1);
-  }
-  static uintptr_t ComputeNextTag(uintptr_t a) {
-    return ComputeTag(a) + kLineSize;
-  }
-
-  uint16_t *granularity_mask(uintptr_t off) {
-    DCHECK(off < kLineSize);
-    return &granularity_[off / 8];
-  }
-
-  void Split_8_to_4(uintptr_t off) {
-    DebugTrace(off, __FUNCTION__, __LINE__);
-    uint16_t gr = *granularity_mask(off);
-    if (GranularityIs8(off, gr)) {
-      DCHECK(!GranularityIs4(off, gr));
-      DCHECK(!GranularityIs2(off, gr));
-      DCHECK(!GranularityIs1(off, gr));
-      uintptr_t off_8_aligned = off & ~7;
-      if (has_shadow_value_.Get(off_8_aligned)) {
-        ShadowValue sval = GetValue(off_8_aligned);
-        sval.Ref("Split_8_to_4");
-        DCHECK(!has_shadow_value_.Get(off_8_aligned + 4));
-        *AddNewSvalAtOffset(off_8_aligned + 4) = sval;
-      }
-      *granularity_mask(off) = gr = 3 << 1;
-      DCHECK(GranularityIs4(off, gr));
-      DebugTrace(off, __FUNCTION__, __LINE__);
-    }
-  }
-
-  void Split_4_to_2(uintptr_t off) {
-    DebugTrace(off, __FUNCTION__, __LINE__);
-    uint16_t gr = *granularity_mask(off);
-    if (GranularityIs4(off, gr)) {
-      DCHECK(!GranularityIs8(off, gr));
-      DCHECK(!GranularityIs2(off, gr));
-      DCHECK(!GranularityIs1(off, gr));
-      uint16_t off_4_aligned = off & ~3;
-      if (has_shadow_value_.Get(off_4_aligned)) {
-        ShadowValue sval = GetValue(off_4_aligned);
-        sval.Ref("Split_4_to_2");
-        DCHECK(!has_shadow_value_.Get(off_4_aligned + 2));
-        *AddNewSvalAtOffset(off_4_aligned + 2) = sval;
-      }
-      // Clear this 4-granularity bit.
-      uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
-      gr &= ~(1 << (1 + off_within_8_bytes));
-      // Set two 2-granularity bits.
-      gr |= 3 << (3 + 2 * off_within_8_bytes);
-      *granularity_mask(off) = gr;
-      DebugTrace(off, __FUNCTION__, __LINE__);
-    }
-  }
-
-  void Split_2_to_1(uintptr_t off) {
-    DebugTrace(off, __FUNCTION__, __LINE__);
-    uint16_t gr = *granularity_mask(off);
-    if (GranularityIs2(off, gr)) {
-      DCHECK(!GranularityIs8(off, gr));
-      DCHECK(!GranularityIs4(off, gr));
-      DCHECK(!GranularityIs1(off, gr));
-      uint16_t off_2_aligned = off & ~1;
-      if (has_shadow_value_.Get(off_2_aligned)) {
-        ShadowValue sval = GetValue(off_2_aligned);
-        sval.Ref("Split_2_to_1");
-        DCHECK(!has_shadow_value_.Get(off_2_aligned + 1));
-        *AddNewSvalAtOffset(off_2_aligned + 1) = sval;
-      }
-      // Clear this 2-granularity bit.
-      uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
-      gr &= ~(1 << (3 + off_within_8_bytes));
-      // Set two 1-granularity bits.
-      gr |= 3 << (7 + 2 * off_within_8_bytes);
-      *granularity_mask(off) = gr;
-      DebugTrace(off, __FUNCTION__, __LINE__);
-    }
-  }
-
-  void Join_1_to_2(uintptr_t off) {
-    DebugTrace(off, __FUNCTION__, __LINE__);
-    DCHECK((off & 1) == 0);
-    uint16_t gr = *granularity_mask(off);
-    if (GranularityIs1(off, gr)) {
-      DCHECK(GranularityIs1(off + 1, gr));
-      if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 1)) {
-        if (GetValue(off) == GetValue(off + 1)) {
-          ShadowValue *sval_p = GetValuePointer(off + 1);
-          sval_p->Unref("Join_1_to_2");
-          sval_p->Clear();
-          has_shadow_value_.Clear(off + 1);
-          uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
-          // Clear two 1-granularity bits.
-          gr &= ~(3 << (7 + 2 * off_within_8_bytes));
-          // Set one 2-granularity bit.
-          gr |= 1 << (3 + off_within_8_bytes);
-          *granularity_mask(off) = gr;
-          DebugTrace(off, __FUNCTION__, __LINE__);
-        }
-      }
-    }
-  }
-
-  void Join_2_to_4(uintptr_t off) {
-    DebugTrace(off, __FUNCTION__, __LINE__);
-    DCHECK((off & 3) == 0);
-    uint16_t gr = *granularity_mask(off);
-    if (GranularityIs2(off, gr) && GranularityIs2(off + 2, gr)) {
-      if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 2)) {
-        if (GetValue(off) == GetValue(off + 2)) {
-          ShadowValue *sval_p = GetValuePointer(off + 2);
-          sval_p->Unref("Join_2_to_4");
-          sval_p->Clear();
-          has_shadow_value_.Clear(off + 2);
-          uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
-          // Clear two 2-granularity bits.
-          gr &= ~(3 << (3 + 2 * off_within_8_bytes));
-          // Set one 4-granularity bit.
-          gr |= 1 << (1 + off_within_8_bytes);
-          *granularity_mask(off) = gr;
-          DebugTrace(off, __FUNCTION__, __LINE__);
-        }
-      }
-    }
-  }
-
-  void Join_4_to_8(uintptr_t off) {
-    DebugTrace(off, __FUNCTION__, __LINE__);
-    DCHECK((off & 7) == 0);
-    uint16_t gr = *granularity_mask(off);
-    if (GranularityIs4(off, gr) && GranularityIs4(off + 4, gr)) {
-      if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 4)) {
-        if (GetValue(off) == GetValue(off + 4)) {
-          ShadowValue *sval_p = GetValuePointer(off + 4);
-          sval_p->Unref("Join_4_to_8");
-          sval_p->Clear();
-          has_shadow_value_.Clear(off + 4);
-          *granularity_mask(off) = 1;
-          DebugTrace(off, __FUNCTION__, __LINE__);
-        }
-      }
-    }
-  }
-
-  static void InitClassMembers() {
-    if (DEBUG_MODE) {
-      Printf("sizeof(CacheLine) = %ld\n", sizeof(CacheLine));
-    }
-    free_list_ = new FreeList(sizeof(CacheLine), 1024);
-  }
-
- private:
-  explicit CacheLine(uintptr_t tag) {
-    tag_ = tag;
-    Clear();
-  }
-  ~CacheLine() { }
-
-  uintptr_t tag_;
-
-  // data members
-  Mask has_shadow_value_;
-  Mask traced_;
-  Mask racey_;
-  Mask published_;
-  uint16_t granularity_[kLineSize / 8];
-  ShadowValue vals_[kLineSize];
-
-  // static data members.
-  static FreeList *free_list_;
-};
-
-FreeList *CacheLine::free_list_;
-
-// If range [a,b) fits into one line, return that line's tag.
-// Else range [a,b) is broken into these ranges:
-//   [a, line1_tag)
-//   [line1_tag, line2_tag)
-//   [line2_tag, b)
-// and 0 is returned.
-uintptr_t GetCacheLinesForRange(uintptr_t a, uintptr_t b,
-                                uintptr_t *line1_tag, uintptr_t *line2_tag) {
-  uintptr_t a_tag = CacheLine::ComputeTag(a);
-  uintptr_t next_tag = CacheLine::ComputeNextTag(a);
-  if (b < next_tag) {
-    return a_tag;
-  }
-  *line1_tag = next_tag;
-  *line2_tag = CacheLine::ComputeTag(b);
-  return 0;
-}
-
-
-// -------- Cache ------------------ {{{1
-class Cache {
- public:
-  Cache() {
-    memset(lines_, 0, sizeof(lines_));
-    ANNOTATE_BENIGN_RACE_SIZED(lines_, sizeof(lines_),
-                               "Cache::lines_ accessed without a lock");
-  }
-
-  INLINE static CacheLine *kLineIsLocked() {
-    return (CacheLine*)1;
-  }
-
-  INLINE static bool LineIsNullOrLocked(CacheLine *line) {
-    return (uintptr_t)line <= 1;
-  }
-
-  INLINE CacheLine *TidMagic(int32_t tid) {
-    return kLineIsLocked();
-  }
-
-  // Try to get a CacheLine for exclusive use.
-  // May return NULL or kLineIsLocked.
-  INLINE CacheLine *TryAcquireLine(TSanThread *thr, uintptr_t a, int call_site) {
-    uintptr_t cli = ComputeCacheLineIndexInCache(a);
-    CacheLine **addr = &lines_[cli];
-    CacheLine *res = (CacheLine*)AtomicExchange(
-           (uintptr_t*)addr, (uintptr_t)kLineIsLocked());
-    if (DEBUG_MODE && debug_cache) {
-      uintptr_t tag = CacheLine::ComputeTag(a);
-      if (res && res != kLineIsLocked())
-        Printf("TryAcquire %p empty=%d tag=%lx cli=%lx site=%d\n",
-               res, res->Empty(), res->tag(), cli, call_site);
-      else
-        Printf("TryAcquire tag=%lx cli=%d site=%d\n", tag, cli, call_site);
-    }
-    if (res) {
-      ANNOTATE_HAPPENS_AFTER((void*)cli);
-    }
-    return res;
-  }
-
-  INLINE CacheLine *AcquireLine(TSanThread *thr, uintptr_t a, int call_site) {
-    CacheLine *line = NULL;
-    int iter = 0;
-    const int max_iter = 1 << 30;
-    for (;;) {
-      line = TryAcquireLine(thr, a, call_site);
-      if (line != kLineIsLocked())
-        break;
-      iter++;
-      if ((iter % (1 << 6)) == 0) {
-        YIELD();
-        G_stats->try_acquire_line_spin++;
-        if (DEBUG_MODE && debug_cache && ((iter & (iter - 1)) == 0)) {
-          Printf("T%d %s a=%p iter=%d\n", raw_tid(thr), __FUNCTION__, a, iter);
-        }
-      } else {
-        for (int active_spin = 0; active_spin != 10; active_spin += 1) {
-          PROCESSOR_YIELD();
-        }
-      }
-      if (DEBUG_MODE && debug_cache && iter == max_iter) {
-        Printf("Failed to acquire a cache line: T%d a=%p site=%d\n",
-               raw_tid(thr), a, call_site);
-        CHECK(iter < max_iter);
-      }
-    }
-    DCHECK(lines_[ComputeCacheLineIndexInCache(a)] == TidMagic(raw_tid(thr)));
-    return line;
-  }
-
-  // Release a CacheLine from exclusive use.
-  INLINE void ReleaseLine(TSanThread *thr, uintptr_t a, CacheLine *line, int call_site) {
-    if (TS_SERIALIZED) return;
-    DCHECK(line != kLineIsLocked());
-    uintptr_t cli = ComputeCacheLineIndexInCache(a);
-    DCHECK(line == NULL ||
-           cli == ComputeCacheLineIndexInCache(line->tag()));
-    CacheLine **addr = &lines_[cli];
-    DCHECK(*addr == TidMagic(raw_tid(thr)));
-    ReleaseStore((uintptr_t*)addr, (uintptr_t)line);
-    ANNOTATE_HAPPENS_BEFORE((void*)cli);
-    if (DEBUG_MODE && debug_cache) {
-      uintptr_t tag = CacheLine::ComputeTag(a);
-      if (line)
-        Printf("Release %p empty=%d tag=%lx cli=%lx site=%d\n",
-               line, line->Empty(), line->tag(), cli, call_site);
-      else
-        Printf("Release tag=%lx cli=%d site=%d\n", tag, cli, call_site);
-    }
-  }
-
-  void AcquireAllLines(TSanThread *thr) {
-    CHECK(TS_SERIALIZED == 0);
-    for (size_t i = 0; i < (size_t)kNumLines; i++) {
-      uintptr_t tag = i << CacheLine::kLineSizeBits;
-      AcquireLine(thr, tag, __LINE__);
-      CHECK(lines_[i] == kLineIsLocked());
-    }
-  }
-
-  // Get a CacheLine. This operation should be performed under a lock
-  // (whatever that is), but other threads may be acquiring the same line
-  // concurrently w/o a lock.
-  // Every call to GetLine() which returns non-null line
-  // should be followed by a call to ReleaseLine().
-  INLINE CacheLine *GetLine(TSanThread *thr, uintptr_t a, bool create_new_if_need, int call_site) {
-    uintptr_t tag = CacheLine::ComputeTag(a);
-    DCHECK(tag <= a);
-    DCHECK(tag + CacheLine::kLineSize > a);
-    uintptr_t cli = ComputeCacheLineIndexInCache(a);
-    CacheLine *res = NULL;
-    CacheLine *line = NULL;
-
-    if (create_new_if_need == false && lines_[cli] == 0) {
-      // There is no such line in the cache, nor should it be in the storage.
-      // Check that the storage indeed does not have this line.
-      // Such DCHECK is racey if tsan is multi-threaded.
-      DCHECK(TS_SERIALIZED == 0 || storage_.count(tag) == 0);
-      return NULL;
-    }
-
-    if (TS_SERIALIZED) {
-      line = lines_[cli];
-    } else {
-      line = AcquireLine(thr, tag, call_site);
-    }
-
-
-    if (LIKELY(line && line->tag() == tag)) {
-      res = line;
-    } else {
-      res = WriteBackAndFetch(thr, line, tag, cli, create_new_if_need);
-      if (!res) {
-        ReleaseLine(thr, a, line, call_site);
-      }
-    }
-    if (DEBUG_MODE && debug_cache) {
-      if (res)
-        Printf("GetLine %p empty=%d tag=%lx\n", res, res->Empty(), res->tag());
-      else
-        Printf("GetLine res=NULL, line=%p tag=%lx cli=%lx\n", line, tag, cli);
-    }
-    return res;
-  }
-
-  INLINE CacheLine *GetLineOrCreateNew(TSanThread *thr, uintptr_t a, int call_site) {
-    return GetLine(thr, a, true, call_site);
-  }
-  INLINE CacheLine *GetLineIfExists(TSanThread *thr, uintptr_t a, int call_site) {
-    return GetLine(thr, a, false, call_site);
-  }
-
-  void ForgetAllState(TSanThread *thr) {
-    for (int i = 0; i < kNumLines; i++) {
-      if (TS_SERIALIZED == 0) CHECK(LineIsNullOrLocked(lines_[i]));
-      lines_[i] = NULL;
-    }
-    map<uintptr_t, Mask> racey_masks;
-    for (Map::iterator i = storage_.begin(); i != storage_.end(); ++i) {
-      CacheLine *line = i->second;
-      if (!line->racey().Empty()) {
-        racey_masks[line->tag()] = line->racey();
-      }
-      CacheLine::Delete(line);
-    }
-    storage_.clear();
-    // Restore the racey masks.
-    for (map<uintptr_t, Mask>::iterator it = racey_masks.begin();
-         it != racey_masks.end(); it++) {
-      CacheLine *line = GetLineOrCreateNew(thr, it->first, __LINE__);
-      line->racey() = it->second;
-      DCHECK(!line->racey().Empty());
-      ReleaseLine(thr, line->tag(), line, __LINE__);
-    }
-  }
-
-  void PrintStorageStats() {
-    if (!G_flags->show_stats) return;
-    set<ShadowValue> all_svals;
-    map<size_t, int> sizes;
-    for (Map::iterator it = storage_.begin(); it != storage_.end(); ++it) {
-      CacheLine *line = it->second;
-      // uintptr_t cli = ComputeCacheLineIndexInCache(line->tag());
-      //if (lines_[cli] == line) {
-        // this line is in cache -- ignore it.
-      //  continue;
-      //}
-      set<ShadowValue> s;
-      for (uintptr_t i = 0; i < CacheLine::kLineSize; i++) {
-        if (line->has_shadow_value().Get(i)) {
-          ShadowValue sval = *(line->GetValuePointer(i));
-          s.insert(sval);
-          all_svals.insert(sval);
-        }
-      }
-      size_t size = s.size();
-      if (size > 10) size = 10;
-      sizes[size]++;
-    }
-    Printf("Storage sizes: %ld\n", storage_.size());
-    for (size_t size = 0; size <= CacheLine::kLineSize; size++) {
-      if (sizes[size]) {
-        Printf("  %ld => %d\n", size, sizes[size]);
-      }
-    }
-    Printf("Different svals: %ld\n", all_svals.size());
-    set <SSID> all_ssids;
-    for (set<ShadowValue>::iterator it = all_svals.begin(); it != all_svals.end(); ++it) {
-      ShadowValue sval = *it;
-      for (int i = 0; i < 2; i++) {
-        SSID ssid = i ? sval.rd_ssid() : sval.wr_ssid();
-        all_ssids.insert(ssid);
-      }
-    }
-    Printf("Different ssids: %ld\n", all_ssids.size());
-    set <SID> all_sids;
-    for (set<SSID>::iterator it = all_ssids.begin(); it != all_ssids.end(); ++it) {
-      int size = SegmentSet::Size(*it);
-      for (int i = 0; i < size; i++) {
-        SID sid = SegmentSet::GetSID(*it, i, __LINE__);
-        all_sids.insert(sid);
-      }
-    }
-    Printf("Different sids: %ld\n", all_sids.size());
-    for (int i = 1; i < Segment::NumberOfSegments(); i++) {
-      if (Segment::ProfileSeg(SID(i)) && all_sids.count(SID(i)) == 0) {
-        // Printf("Segment SID %d: missing in storage; ref=%d\n", i,
-        // Segment::Get(SID(i))->ref_count());
-      }
-    }
-  }
-
- private:
-  INLINE uintptr_t ComputeCacheLineIndexInCache(uintptr_t addr) {
-    return (addr >> CacheLine::kLineSizeBits) & (kNumLines - 1);
-  }
-
-  NOINLINE CacheLine *WriteBackAndFetch(TSanThread *thr, CacheLine *old_line,
-                                        uintptr_t tag, uintptr_t cli,
-                                        bool create_new_if_need) {
-    ScopedMallocCostCenter cc("Cache::WriteBackAndFetch");
-    CacheLine *res;
-    size_t old_storage_size = storage_.size();
-    (void)old_storage_size;
-    CacheLine **line_for_this_tag = NULL;
-    if (create_new_if_need) {
-      line_for_this_tag = &storage_[tag];
-    } else {
-      Map::iterator it = storage_.find(tag);
-      if (it == storage_.end()) {
-        if (DEBUG_MODE && debug_cache) {
-          Printf("WriteBackAndFetch: old_line=%ld tag=%lx cli=%ld\n",
-                 old_line, tag, cli);
-        }
-        return NULL;
-      }
-      line_for_this_tag = &(it->second);
-    }
-    CHECK(line_for_this_tag);
-    DCHECK(old_line != kLineIsLocked());
-    if (*line_for_this_tag == NULL) {
-      // creating a new cache line
-      CHECK(storage_.size() == old_storage_size + 1);
-      res = CacheLine::CreateNewCacheLine(tag);
-      if (DEBUG_MODE && debug_cache) {
-        Printf("%s %d new line %p cli=%lx\n", __FUNCTION__, __LINE__, res, cli);
-      }
-      *line_for_this_tag = res;
-      G_stats->cache_new_line++;
-    } else {
-      // taking an existing cache line from storage.
-      res = *line_for_this_tag;
-      if (DEBUG_MODE && debug_cache) {
-        Printf("%s %d exi line %p tag=%lx old=%p empty=%d cli=%lx\n",
-             __FUNCTION__, __LINE__, res, res->tag(), old_line,
-             res->Empty(), cli);
-      }
-      DCHECK(!res->Empty());
-      G_stats->cache_fetch++;
-    }
-
-    if (TS_SERIALIZED) {
-      lines_[cli] = res;
-    } else {
-      DCHECK(lines_[cli] == TidMagic(raw_tid(thr)));
-    }
-
-    if (old_line) {
-      if (DEBUG_MODE && debug_cache) {
-        Printf("%s %d old line %p empty=%d\n", __FUNCTION__, __LINE__,
-               old_line, old_line->Empty());
-      }
-      if (old_line->Empty()) {
-        storage_.erase(old_line->tag());
-        CacheLine::Delete(old_line);
-        G_stats->cache_delete_empty_line++;
-      } else {
-        if (debug_cache) {
-          DebugOnlyCheckCacheLineWhichWeReplace(old_line, res);
-        }
-      }
-    }
-    DCHECK(res->tag() == tag);
-
-    if (G_stats->cache_max_storage_size < storage_.size()) {
-      G_stats->cache_max_storage_size = storage_.size();
-    }
-
-    return res;
-  }
-
-  void DebugOnlyCheckCacheLineWhichWeReplace(CacheLine *old_line,
-                                             CacheLine *new_line) {
-    static int c = 0;
-    c++;
-    if ((c % 1024) == 1) {
-      set<int64_t> s;
-      for (uintptr_t i = 0; i < CacheLine::kLineSize; i++) {
-        if (old_line->has_shadow_value().Get(i)) {
-          int64_t sval = *reinterpret_cast<int64_t*>(
-                            old_line->GetValuePointer(i));
-          s.insert(sval);
-        }
-      }
-      Printf("\n[%d] Cache Size=%ld %s different values: %ld\n", c,
-             storage_.size(), old_line->has_shadow_value().ToString().c_str(),
-             s.size());
-
-      Printf("new line: %p %p\n", new_line->tag(), new_line->tag()
-             + CacheLine::kLineSize);
-      G_stats->PrintStatsForCache();
-    }
-  }
-
-  static const int kNumLines = 1 << (DEBUG_MODE ? 14 : 21);
-  CacheLine *lines_[kNumLines];
-
-  // tag => CacheLine
-  typedef unordered_map<uintptr_t, CacheLine*> Map;
-  Map storage_;
-};
-
-static  Cache *G_cache;
-
-// -------- Published range -------------------- {{{1
-struct PublishInfo {
-  uintptr_t tag;   // Tag of the cache line where the mem is published.
-  Mask      mask;  // The bits that are actually published.
-  VTS      *vts;   // The point where this range has been published.
-};
-
-
-typedef multimap<uintptr_t, PublishInfo> PublishInfoMap;
-
-// Maps 'mem+size' to the PublishInfoMap{mem, size, vts}.
-static PublishInfoMap *g_publish_info_map;
-
-const int kDebugPublish = 0;
-
-// Get a VTS where 'a' has been published,
-// return NULL if 'a' was not published.
-static const VTS *GetPublisherVTS(uintptr_t a) {
-  uintptr_t tag = CacheLine::ComputeTag(a);
-  uintptr_t off = CacheLine::ComputeOffset(a);
-  typedef PublishInfoMap::iterator Iter;
-
-  pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(tag);
-  for (Iter it = eq_range.first; it != eq_range.second; ++it) {
-    PublishInfo &info = it->second;
-    DCHECK(info.tag == tag);
-    if (info.mask.Get(off)) {
-      G_stats->publish_get++;
-      // Printf("GetPublisherVTS: a=%p vts=%p\n", a, info.vts);
-      return info.vts;
-    }
-  }
-  Printf("GetPublisherVTS returned NULL: a=%p\n", a);
-  return NULL;
-}
-
-static bool CheckSanityOfPublishedMemory(uintptr_t tag, int line) {
-  if (!DEBUG_MODE) return true;
-  if (kDebugPublish)
-    Printf("CheckSanityOfPublishedMemory: line=%d\n", line);
-  typedef PublishInfoMap::iterator Iter;
-  pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(tag);
-  Mask union_of_masks(0);
-  // iterate over all entries for this tag
-  for (Iter it = eq_range.first; it != eq_range.second; ++it) {
-    PublishInfo &info = it->second;
-    CHECK(info.tag  == tag);
-    CHECK(it->first == tag);
-    CHECK(info.vts);
-    Mask mask(info.mask);
-    CHECK(!mask.Empty());  // Mask should not be empty..
-    // And should not intersect with other masks.
-    CHECK(Mask::Intersection(union_of_masks, mask).Empty());
-    union_of_masks.Union(mask);
-  }
-  return true;
-}
-
-// Clear the publish attribute for the bytes from 'line' that are set in 'mask'
-static void ClearPublishedAttribute(CacheLine *line, Mask mask) {
-  CHECK(CheckSanityOfPublishedMemory(line->tag(), __LINE__));
-  typedef PublishInfoMap::iterator Iter;
-  bool deleted_some = true;
-  if (kDebugPublish)
-    Printf(" ClearPublishedAttribute: %p %s\n",
-           line->tag(), mask.ToString().c_str());
-  while (deleted_some) {
-    deleted_some = false;
-    pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(line->tag());
-    for (Iter it = eq_range.first; it != eq_range.second; ++it) {
-      PublishInfo &info = it->second;
-      DCHECK(info.tag == line->tag());
-      if (kDebugPublish)
-        Printf("?ClearPublishedAttribute: %p %s\n", line->tag(),
-               info.mask.ToString().c_str());
-      info.mask.Subtract(mask);
-      if (kDebugPublish)
-        Printf("+ClearPublishedAttribute: %p %s\n", line->tag(),
-               info.mask.ToString().c_str());
-      G_stats->publish_clear++;
-      if (info.mask.Empty()) {
-        VTS::Unref(info.vts);
-        g_publish_info_map->erase(it);
-        deleted_some = true;
-        break;
-      }
-    }
-  }
-  CHECK(CheckSanityOfPublishedMemory(line->tag(), __LINE__));
-}
-
-// Publish range [a, b) in addr's CacheLine with vts.
-static void PublishRangeInOneLine(TSanThread *thr, uintptr_t addr, uintptr_t a,
-                                  uintptr_t b, VTS *vts) {
-  ScopedMallocCostCenter cc("PublishRangeInOneLine");
-  DCHECK(b <= CacheLine::kLineSize);
-  DCHECK(a < b);
-  uintptr_t tag = CacheLine::ComputeTag(addr);
-  CHECK(CheckSanityOfPublishedMemory(tag, __LINE__));
-  CacheLine *line = G_cache->GetLineOrCreateNew(thr, tag, __LINE__);
-
-  if (1 || line->published().GetRange(a, b)) {
-    Mask mask(0);
-    mask.SetRange(a, b);
-    // TODO(timurrrr): add warning for re-publishing.
-    ClearPublishedAttribute(line, mask);
-  }
-
-  line->published().SetRange(a, b);
-  G_cache->ReleaseLine(thr, tag, line, __LINE__);
-
-  PublishInfo pub_info;
-  pub_info.tag  = tag;
-  pub_info.mask.SetRange(a, b);
-  pub_info.vts  = vts->Clone();
-  g_publish_info_map->insert(make_pair(tag, pub_info));
-  G_stats->publish_set++;
-  if (kDebugPublish)
-    Printf("PublishRange   : [%p,%p) %p %s vts=%p\n",
-           a, b, tag, pub_info.mask.ToString().c_str(), vts);
-  CHECK(CheckSanityOfPublishedMemory(tag, __LINE__));
-}
-
-// Publish memory range [a, b).
-static void PublishRange(TSanThread *thr, uintptr_t a, uintptr_t b, VTS *vts) {
-  CHECK(a);
-  CHECK(a < b);
-  if (kDebugPublish)
-    Printf("PublishRange   : [%p,%p), size=%d, tag=%p\n",
-           a, b, (int)(b - a), CacheLine::ComputeTag(a));
-  uintptr_t line1_tag = 0, line2_tag = 0;
-  uintptr_t tag = GetCacheLinesForRange(a, b, &line1_tag, &line2_tag);
-  if (tag) {
-    PublishRangeInOneLine(thr, tag, a - tag, b - tag, vts);
-    return;
-  }
-  uintptr_t a_tag = CacheLine::ComputeTag(a);
-  PublishRangeInOneLine(thr, a, a - a_tag, CacheLine::kLineSize, vts);
-  for (uintptr_t tag_i = line1_tag; tag_i < line2_tag;
-       tag_i += CacheLine::kLineSize) {
-    PublishRangeInOneLine(thr, tag_i, 0, CacheLine::kLineSize, vts);
-  }
-  if (b > line2_tag) {
-    PublishRangeInOneLine(thr, line2_tag, 0, b - line2_tag, vts);
-  }
-}
-
-// -------- ThreadSanitizerReport -------------- {{{1
-struct ThreadSanitizerReport {
-  // Types of reports.
-  enum ReportType {
-    DATA_RACE,
-    UNLOCK_FOREIGN,
-    UNLOCK_NONLOCKED,
-    INVALID_LOCK,
-    ATOMICITY_VIOLATION,
-  };
-
-  // Common fields.
-  ReportType  type;
-  TID         tid;
-  StackTrace *stack_trace;
-
-  const char *ReportName() const {
-    switch (type) {
-      case DATA_RACE:        return "Race";
-      case UNLOCK_FOREIGN:   return "UnlockForeign";
-      case UNLOCK_NONLOCKED: return "UnlockNonLocked";
-      case INVALID_LOCK:     return "InvalidLock";
-      case ATOMICITY_VIOLATION: return "AtomicityViolation";
-    }
-    CHECK(0);
-    return NULL;
-  }
-
-  virtual ~ThreadSanitizerReport() {
-    StackTrace::Delete(stack_trace);
-  }
-};
-
-static bool ThreadSanitizerPrintReport(ThreadSanitizerReport *report);
-
-// DATA_RACE.
-struct ThreadSanitizerDataRaceReport : public ThreadSanitizerReport {
-  uintptr_t   racey_addr;
-  string      racey_addr_description;
-  uintptr_t   last_access_size;
-  TID         last_access_tid;
-  SID         last_access_sid;
-  bool        last_access_is_w;
-  LSID        last_acces_lsid[2];
-
-  ShadowValue new_sval;
-  ShadowValue old_sval;
-
-  bool        is_expected;
-  bool        racey_addr_was_published;
-};
-
-// Report for bad unlock (UNLOCK_FOREIGN, UNLOCK_NONLOCKED).
-struct ThreadSanitizerBadUnlockReport : public ThreadSanitizerReport {
-  LID lid;
-};
-
-// Report for invalid lock addresses (INVALID_LOCK).
-struct ThreadSanitizerInvalidLockReport : public ThreadSanitizerReport {
-  uintptr_t lock_addr;
-};
-
-class AtomicityRegion;
-
-struct ThreadSanitizerAtomicityViolationReport : public ThreadSanitizerReport {
-  AtomicityRegion *r1, *r2, *r3;
-};
-
-
-// -------- LockHistory ------------- {{{1
-// For each thread we store a limited amount of history of locks and unlocks.
-// If there is a race report (in hybrid mode) we try to guess a lock
-// which might have been used to pass the ownership of the object between
-// threads.
-//
-// Thread1:                    Thread2:
-// obj->UpdateMe();
-// mu.Lock();
-// flag = true;
-// mu.Unlock(); // (*)
-//                             mu.Lock();  // (**)
-//                             bool f = flag;
-//                             mu.Unlock();
-//                             if (f)
-//                                obj->UpdateMeAgain();
-//
-// For this code a hybrid detector may report a false race.
-// LockHistory will find the lock mu and report it.
-
-struct LockHistory {
- public:
-  // LockHistory which will track no more than `size` recent locks
-  // and the same amount of unlocks.
-  LockHistory(size_t size): size_(size) { }
-
-  // Record a Lock event.
-  void OnLock(LID lid) {
-    g_lock_era++;
-    Push(LockHistoryElement(lid, g_lock_era), &locks_);
-  }
-
-  // Record an Unlock event.
-  void OnUnlock(LID lid) {
-    g_lock_era++;
-    Push(LockHistoryElement(lid, g_lock_era), &unlocks_);
-  }
-
-  // Find locks such that:
-  // - A Lock happend in `l`.
-  // - An Unlock happened in `u`.
-  // - Lock's era is greater than Unlock's era.
-  // - Both eras are greater or equal than min_lock_era.
-  static bool Intersect(const LockHistory &l, const LockHistory &u,
-                        int32_t min_lock_era, set<LID> *locks) {
-    const Queue &lq = l.locks_;
-    const Queue &uq = u.unlocks_;
-    for (size_t i = 0; i < lq.size(); i++) {
-      int32_t l_era = lq[i].lock_era;
-      if (l_era < min_lock_era) continue;
-      LID lid = lq[i].lid;
-      // We don't want to report pure happens-before locks since
-      // they already create h-b arcs.
-      if (Lock::LIDtoLock(lid)->is_pure_happens_before()) continue;
-      for (size_t j = 0; j < uq.size(); j++) {
-        int32_t u_era = uq[j].lock_era;
-        if (lid != uq[j].lid) continue;
-        // Report("LockHistory::Intersect: L%d %d %d %d\n", lid.raw(), min_lock_era, u_era, l_era);
-        if (u_era < min_lock_era)  continue;
-        if (u_era > l_era) continue;
-        locks->insert(lid);
-      }
-    }
-    return !locks->empty();
-  }
-
-  void PrintLocks() const { Print(&locks_); }
-  void PrintUnlocks() const { Print(&unlocks_); }
-
- private:
-  struct LockHistoryElement {
-    LID lid;
-    uint32_t lock_era;
-    LockHistoryElement(LID l, uint32_t era)
-        : lid(l),
-        lock_era(era) {
-        }
-  };
-
-  typedef deque<LockHistoryElement> Queue;
-
-  void Push(LockHistoryElement e, Queue *q) {
-    CHECK(q->size() <= size_);
-    if (q->size() == size_)
-      q->pop_front();
-    q->push_back(e);
-  }
-
-  void Print(const Queue *q) const {
-    set<LID> printed;
-    for (size_t i = 0; i < q->size(); i++) {
-      const LockHistoryElement &e = (*q)[i];
-      if (printed.count(e.lid)) continue;
-      Report("era %d: \n", e.lock_era);
-      Lock::ReportLockWithOrWithoutContext(e.lid, true);
-      printed.insert(e.lid);
-    }
-  }
-
-  Queue locks_;
-  Queue unlocks_;
-  size_t size_;
-};
-
-// -------- RecentSegmentsCache ------------- {{{1
-// For each thread we store a limited amount of recent segments with
-// the same VTS and LS as the current segment.
-// When a thread enters a new basic block, we can sometimes reuse a
-// recent segment if it is the same or not used anymore (see Search()).
-//
-// We need to flush the cache when current lockset changes or the current
-// VTS changes or we do ForgetAllState.
-// TODO(timurrrr): probably we can cache segments with different LSes and
-// compare their LS with the current LS.
-struct RecentSegmentsCache {
- public:
-  RecentSegmentsCache(int cache_size) : cache_size_(cache_size) {}
-  ~RecentSegmentsCache() { Clear(); }
-
-  void Clear() {
-    ShortenQueue(0);
-  }
-
-  void Push(SID sid) {
-    queue_.push_front(sid);
-    Segment::Ref(sid, "RecentSegmentsCache::ShortenQueue");
-    ShortenQueue(cache_size_);
-  }
-
-  void ForgetAllState() {
-    queue_.clear();  // Don't unref - the segments are already dead.
-  }
-
-  INLINE SID Search(CallStack *curr_stack,
-                    SID curr_sid, /*OUT*/ bool *needs_refill) {
-    // TODO(timurrrr): we can probably move the matched segment to the head
-    // of the queue.
-
-    deque<SID>::iterator it = queue_.begin();
-    for (; it != queue_.end(); it++) {
-      SID sid = *it;
-      Segment::AssertLive(sid, __LINE__);
-      Segment *seg = Segment::Get(sid);
-
-      if (seg->ref_count() == 1 + (sid == curr_sid)) {
-        // The current segment is not used anywhere else,
-        // so just replace the stack trace in it.
-        // The refcount of an unused segment is equal to
-        // *) 1 if it is stored only in the cache,
-        // *) 2 if it is the current segment of the Thread.
-        *needs_refill = true;
-        return sid;
-      }
-
-      // Check three top entries of the call stack of the recent segment.
-      // If they match the current segment stack, don't create a new segment.
-      // This can probably lead to a little bit wrong stack traces in rare
-      // occasions but we don't really care that much.
-      if (kSizeOfHistoryStackTrace > 0) {
-        size_t n = curr_stack->size();
-        uintptr_t *emb_trace = Segment::embedded_stack_trace(sid);
-        if(*emb_trace &&  // This stack trace was filled
-           curr_stack->size() >= 3 &&
-           emb_trace[0] == (*curr_stack)[n-1] &&
-           emb_trace[1] == (*curr_stack)[n-2] &&
-           emb_trace[2] == (*curr_stack)[n-3]) {
-          *needs_refill = false;
-          return sid;
-        }
-      }
-    }
-
-    return SID();
-  }
-
- private:
-  void ShortenQueue(size_t flush_to_length) {
-    while (queue_.size() > flush_to_length) {
-      SID sid = queue_.back();
-      Segment::Unref(sid, "RecentSegmentsCache::ShortenQueue");
-      queue_.pop_back();
-    }
-  }
-
-  deque<SID> queue_;
-  size_t cache_size_;
-};
-
-// -------- TraceInfo ------------------ {{{1
-vector<TraceInfo*> *TraceInfo::g_all_traces;
-
-TraceInfo *TraceInfo::NewTraceInfo(size_t n_mops, uintptr_t pc) {
-  ScopedMallocCostCenter cc("TraceInfo::NewTraceInfo");
-  size_t mem_size = (sizeof(TraceInfo) + (n_mops - 1) * sizeof(MopInfo));
-  uint8_t *mem = new uint8_t[mem_size];
-  memset(mem, 0xab, mem_size);
-  TraceInfo *res = new (mem) TraceInfo;
-  res->n_mops_ = n_mops;
-  res->pc_ = ThreadSanitizerWantToCreateSegmentsOnSblockEntry(pc) ? pc : 0;
-  res->counter_ = 0;
-  if (g_all_traces == NULL) {
-    g_all_traces = new vector<TraceInfo*>;
-  }
-  res->literace_storage = NULL;
-  if (G_flags->literace_sampling != 0) {
-    ScopedMallocCostCenter cc("TraceInfo::NewTraceInfo::LiteRaceStorage");
-    size_t index_of_this_trace = g_all_traces->size();
-    if ((index_of_this_trace % kLiteRaceStorageSize) == 0) {
-      res->literace_storage = (LiteRaceStorage*)
-          new LiteRaceCounters [kLiteRaceStorageSize * kLiteRaceNumTids];
-      memset(res->literace_storage, 0, sizeof(LiteRaceStorage));
-    } else {
-      CHECK(index_of_this_trace > 0);
-      res->literace_storage = (*g_all_traces)[index_of_this_trace - 1]->literace_storage;
-      CHECK(res->literace_storage);
-    }
-    res->storage_index = index_of_this_trace % kLiteRaceStorageSize;
-  }
-  g_all_traces->push_back(res);
-  return res;
-}
-
-void TraceInfo::PrintTraceProfile() {
-  if (!G_flags->trace_profile) return;
-  if (!g_all_traces) return;
-  int64_t total_counter = 0;
-  multimap<size_t, TraceInfo*> traces;
-  for (size_t i = 0; i < g_all_traces->size(); i++) {
-    TraceInfo *trace = (*g_all_traces)[i];
-    traces.insert(make_pair(trace->counter(), trace));
-    total_counter += trace->counter();
-  }
-  if (total_counter == 0) return;
-  Printf("TraceProfile: %ld traces, %lld hits\n",
-         g_all_traces->size(), total_counter);
-  int i = 0;
-  for (multimap<size_t, TraceInfo*>::reverse_iterator it = traces.rbegin();
-       it != traces.rend(); ++it, i++) {
-    TraceInfo *trace = it->second;
-    int64_t c = it->first;
-    int64_t permile = (c * 1000) / total_counter;
-    CHECK(trace->n_mops() > 0);
-    uintptr_t pc = trace->GetMop(0)->pc();
-    CHECK(pc);
-    if (permile == 0 || i >= 20) break;
-    Printf("TR=%p pc: %p %p c=%lld (%lld/1000) n_mops=%ld %s\n",
-           trace, trace->pc(), pc, c,
-           permile, trace->n_mops(),
-           PcToRtnNameAndFilePos(pc).c_str());
-  }
-}
-
-// -------- Atomicity --------------- {{{1
-// An attempt to detect atomicity violations (aka high level races).
-// Here we try to find a very restrictive pattern:
-// Thread1                    Thread2
-//   r1: {
-//     mu.Lock();
-//     code_r1();
-//     mu.Unlock();
-//   }
-//   r2: {
-//     mu.Lock();
-//     code_r2();
-//     mu.Unlock();
-//   }
-//                           r3: {
-//                             mu.Lock();
-//                             code_r3();
-//                             mu.Unlock();
-//                           }
-// We have 3 regions of code such that
-// - two of them are in one thread and 3-rd in another thread.
-// - all 3 regions have the same lockset,
-// - the distance between r1 and r2 is small,
-// - there is no h-b arc between r2 and r3,
-// - r1 and r2 have different stack traces,
-//
-// In this situation we report a 'Suspected atomicity violation'.
-//
-// Current status:
-// this code detects atomicity violations on our two motivating examples
-// (--gtest_filter=*Atomicity*  --gtest_also_run_disabled_tests) and does
-// not overwhelm with false reports.
-// However, this functionality is still raw and not tuned for performance.
-
-// TS_ATOMICITY is on in debug mode or if we enabled it at the build time.
-#ifndef TS_ATOMICITY
-# define TS_ATOMICITY DEBUG_MODE
-#endif
-
-
-struct AtomicityRegion {
-  int lock_era;
-  TID tid;
-  VTS *vts;
-  StackTrace *stack_trace;
-  LSID lsid[2];
-  BitSet access_set[2];
-  bool used;
-  int n_mops_since_start;
-
-  void Print() {
-    Report("T%d era=%d nmss=%ld AtomicityRegion:\n  rd: %s\n  wr: %s\n  %s\n%s",
-           tid.raw(),
-           lock_era,
-           n_mops_since_start,
-           access_set[0].ToString().c_str(),
-           access_set[1].ToString().c_str(),
-           TwoLockSetsToString(lsid[false], lsid[true]).c_str(),
-           stack_trace->ToString().c_str()
-          );
-  }
-};
-
-bool SimilarLockSetForAtomicity(AtomicityRegion *r1, AtomicityRegion *r2) {
-  // Compare only reader locksets (in case one region took reader locks)
-  return ((r1->lsid[0] == r2->lsid[0]));
-}
-
-static deque<AtomicityRegion *> *g_atomicity_regions;
-static map<StackTrace *, int, StackTrace::Less> *reported_atomicity_stacks_;
-const size_t kMaxAtomicityRegions = 8;
-
-static void HandleAtomicityRegion(AtomicityRegion *atomicity_region) {
-  if (!g_atomicity_regions) {
-    g_atomicity_regions = new deque<AtomicityRegion*>;
-    reported_atomicity_stacks_ = new map<StackTrace *, int, StackTrace::Less>;
-  }
-
-  if (g_atomicity_regions->size() >= kMaxAtomicityRegions) {
-    AtomicityRegion *to_delete = g_atomicity_regions->back();
-    g_atomicity_regions->pop_back();
-    if (!to_delete->used) {
-      VTS::Unref(to_delete->vts);
-      StackTrace::Delete(to_delete->stack_trace);
-      delete to_delete;
-    }
-  }
-  g_atomicity_regions->push_front(atomicity_region);
-  size_t n = g_atomicity_regions->size();
-
-  if (0) {
-    for (size_t i = 0; i < n; i++) {
-      AtomicityRegion *r = (*g_atomicity_regions)[i];
-      r->Print();
-    }
-  }
-
-  AtomicityRegion *r3 = (*g_atomicity_regions)[0];
-  for (size_t i = 1; i < n; i++) {
-    AtomicityRegion *r2 = (*g_atomicity_regions)[i];
-    if (r2->tid     != r3->tid &&
-        SimilarLockSetForAtomicity(r2, r3) &&
-        !VTS::HappensBeforeCached(r2->vts, r3->vts)) {
-      for (size_t j = i + 1; j < n; j++) {
-        AtomicityRegion *r1 = (*g_atomicity_regions)[j];
-        if (r1->tid != r2->tid) continue;
-        CHECK(r2->lock_era > r1->lock_era);
-        if (r2->lock_era - r1->lock_era > 2) break;
-        if (!SimilarLockSetForAtomicity(r1, r2)) continue;
-        if (StackTrace::Equals(r1->stack_trace, r2->stack_trace)) continue;
-        if (!(r1->access_set[1].empty() &&
-              !r2->access_set[1].empty() &&
-              !r3->access_set[1].empty())) continue;
-        CHECK(r1->n_mops_since_start <= r2->n_mops_since_start);
-        if (r2->n_mops_since_start - r1->n_mops_since_start > 5) continue;
-        if ((*reported_atomicity_stacks_)[r1->stack_trace] > 0) continue;
-
-        (*reported_atomicity_stacks_)[r1->stack_trace]++;
-        (*reported_atomicity_stacks_)[r2->stack_trace]++;
-        (*reported_atomicity_stacks_)[r3->stack_trace]++;
-        r1->used = r2->used = r3->used = true;
-        ThreadSanitizerAtomicityViolationReport *report =
-            new ThreadSanitizerAtomicityViolationReport;
-        report->type = ThreadSanitizerReport::ATOMICITY_VIOLATION;
-        report->tid = TID(0);
-        report->stack_trace = r1->stack_trace;
-        report->r1 = r1;
-        report->r2 = r2;
-        report->r3 = r3;
-        ThreadSanitizerPrintReport(report);
-        break;
-      }
-    }
-  }
-}
-
-// -------- TSanThread ------------------ {{{1
-struct TSanThread {
- public:
-  ThreadLocalStats stats;
-
-  TSanThread(TID tid, TID parent_tid, VTS *vts, StackTrace *creation_context,
-         CallStack *call_stack)
-    : is_running_(true),
-      tid_(tid),
-      sid_(0),
-      parent_tid_(parent_tid),
-      max_sp_(0),
-      min_sp_(0),
-      stack_size_for_ignore_(0),
-      fun_r_ignore_(0),
-      min_sp_for_ignore_(0),
-      n_mops_since_start_(0),
-      creation_context_(creation_context),
-      announced_(false),
-      rd_lockset_(0),
-      wr_lockset_(0),
-      expensive_bits_(0),
-      vts_at_exit_(NULL),
-      call_stack_(call_stack),
-      lock_history_(128),
-      recent_segments_cache_(G_flags->recent_segments_cache_size),
-      inside_atomic_op_(),
-      rand_state_((unsigned)(tid.raw() + (uintptr_t)vts
-                      + (uintptr_t)creation_context
-                      + (uintptr_t)call_stack)) {
-
-    NewSegmentWithoutUnrefingOld("TSanThread Creation", vts);
-    ignore_depth_[0] = ignore_depth_[1] = 0;
-
-    HandleRtnCall(0, 0, IGNORE_BELOW_RTN_UNKNOWN);
-    ignore_context_[0] = NULL;
-    ignore_context_[1] = NULL;
-    if (tid != TID(0) && parent_tid.valid()) {
-      CHECK(creation_context_);
-    }
-
-    // Add myself to the array of threads.
-    CHECK(tid.raw() < G_flags->max_n_threads);
-    CHECK(all_threads_[tid.raw()] == NULL);
-    n_threads_ = max(n_threads_, tid.raw() + 1);
-    all_threads_[tid.raw()] = this;
-    dead_sids_.reserve(kMaxNumDeadSids);
-    fresh_sids_.reserve(kMaxNumFreshSids);
-    ComputeExpensiveBits();
-  }
-
-  TID tid() const { return tid_; }
-  TID parent_tid() const { return parent_tid_; }
-
-  void increment_n_mops_since_start() {
-    n_mops_since_start_++;
-  }
-
-  // STACK
-  uintptr_t max_sp() const { return max_sp_; }
-  uintptr_t min_sp() const { return min_sp_; }
-
-  unsigned random() {
-    return tsan_prng(&rand_state_);
-  }
-
-  bool ShouldReportRaces() const {
-    return (inside_atomic_op_ == 0);
-  }
-
-  void SetStack(uintptr_t stack_min, uintptr_t stack_max) {
-    CHECK(stack_min < stack_max);
-    // Stay sane. Expect stack less than 64M.
-    CHECK(stack_max - stack_min <= 64 * 1024 * 1024);
-    min_sp_ = stack_min;
-    max_sp_ = stack_max;
-    if (G_flags->ignore_stack) {
-      min_sp_for_ignore_ = min_sp_;
-      stack_size_for_ignore_ = max_sp_ - min_sp_;
-    } else {
-      CHECK(min_sp_for_ignore_ == 0 &&
-            stack_size_for_ignore_ == 0);
-    }
-  }
-
-  bool MemoryIsInStack(uintptr_t a) {
-    return a >= min_sp_ && a <= max_sp_;
-  }
-
-  bool IgnoreMemoryIfInStack(uintptr_t a) {
-    return (a - min_sp_for_ignore_) < stack_size_for_ignore_;
-  }
-
-
-  bool Announce() {
-    if (announced_) return false;
-    announced_ = true;
-    if (tid_ == TID(0)) {
-      Report("INFO: T0 is program's main thread\n");
-    } else {
-      if (G_flags->announce_threads) {
-        Report("INFO: T%d has been created by T%d at this point: {{{\n%s}}}\n",
-               tid_.raw(), parent_tid_.raw(),
-               creation_context_->ToString().c_str());
-        TSanThread * parent = GetIfExists(parent_tid_);
-        CHECK(parent);
-        parent->Announce();
-      } else {
-        Report("INFO: T%d has been created by T%d. "
-               "Use --announce-threads to see the creation stack.\n",
-               tid_.raw(), parent_tid_.raw());
-      }
-    }
-    return true;
-  }
-
-  string ThreadName() const {
-    char buff[100];
-    snprintf(buff, sizeof(buff), "T%d", tid().raw());
-    string res = buff;
-    if (thread_name_.length() > 0) {
-      res += " (";
-      res += thread_name_;
-      res += ")";
-    }
-    return res;
-  }
-
-  bool is_running() const { return is_running_; }
-
-  INLINE void ComputeExpensiveBits() {
-    bool has_expensive_flags = G_flags->trace_level > 0 ||
-        G_flags->show_stats > 1                      ||
-        G_flags->sample_events > 0;
-
-    expensive_bits_ =
-        (ignore_depth_[0] != 0) |
-        ((ignore_depth_[1] != 0) << 1) |
-        ((has_expensive_flags == true) << 2);
-  }
-
-  int expensive_bits() { return expensive_bits_; }
-  int ignore_reads() { return expensive_bits() & 1; }
-  int ignore_writes() { return (expensive_bits() >> 1) & 1; }
-
-  // ignore
-  INLINE void set_ignore_accesses(bool is_w, bool on) {
-    ignore_depth_[is_w] += on ? 1 : -1;
-    CHECK(ignore_depth_[is_w] >= 0);
-    ComputeExpensiveBits();
-    if (on && G_flags->save_ignore_context) {
-      StackTrace::Delete(ignore_context_[is_w]);
-      ignore_context_[is_w] = CreateStackTrace(0, 3);
-    }
-  }
-  INLINE void set_ignore_all_accesses(bool on) {
-    set_ignore_accesses(false, on);
-    set_ignore_accesses(true, on);
-  }
-
-  StackTrace *GetLastIgnoreContext(bool is_w) {
-    return ignore_context_[is_w];
-  }
-
-  SID sid() const {
-    return sid_;
-  }
-
-  Segment *segment() const {
-    CHECK(sid().valid());
-    Segment::AssertLive(sid(), __LINE__);
-    return Segment::Get(sid());
-  }
-
-  VTS *vts() const {
-    return segment()->vts();
-  }
-
-  void set_thread_name(const char *name) {
-    thread_name_ = string(name);
-  }
-
-  void HandleThreadEnd() {
-    CHECK(is_running_);
-    is_running_ = false;
-    CHECK(!vts_at_exit_);
-    vts_at_exit_ = vts()->Clone();
-    CHECK(vts_at_exit_);
-    FlushDeadSids();
-    ReleaseFreshSids();
-    call_stack_ = NULL;
-  }
-
-  // Return the TID of the joined child and it's vts
-  TID HandleThreadJoinAfter(VTS **vts_at_exit, TID joined_tid) {
-    CHECK(joined_tid.raw() > 0);
-    CHECK(GetIfExists(joined_tid) != NULL);
-    TSanThread* joined_thread  = TSanThread::Get(joined_tid);
-    // Sometimes the joined thread is not truly dead yet.
-    // In that case we just take the current vts.
-    if (joined_thread->is_running_)
-      *vts_at_exit = joined_thread->vts()->Clone();
-    else
-      *vts_at_exit = joined_thread->vts_at_exit_;
-
-    if (*vts_at_exit == NULL) {
-      Printf("vts_at_exit==NULL; parent=%d, child=%d\n",
-             tid().raw(), joined_tid.raw());
-    }
-    CHECK(*vts_at_exit);
-    if (0)
-    Printf("T%d: vts_at_exit_: %s\n", joined_tid.raw(),
-           (*vts_at_exit)->ToString().c_str());
-    return joined_tid;
-  }
-
-  static int NumberOfThreads() {
-    return INTERNAL_ANNOTATE_UNPROTECTED_READ(n_threads_);
-  }
-
-  static TSanThread *GetIfExists(TID tid) {
-    if (tid.raw() < NumberOfThreads())
-      return Get(tid);
-    return NULL;
-  }
-
-  static TSanThread *Get(TID tid) {
-    DCHECK(tid.raw() < NumberOfThreads());
-    return all_threads_[tid.raw()];
-  }
-
-  void HandleAccessSet() {
-    BitSet *rd_set = lock_era_access_set(false);
-    BitSet *wr_set = lock_era_access_set(true);
-    if (rd_set->empty() && wr_set->empty()) return;
-    CHECK(G_flags->atomicity && !G_flags->pure_happens_before);
-    AtomicityRegion *atomicity_region = new AtomicityRegion;
-    atomicity_region->lock_era = g_lock_era;
-    atomicity_region->tid = tid();
-    atomicity_region->vts = vts()->Clone();
-    atomicity_region->lsid[0] = lsid(0);
-    atomicity_region->lsid[1] = lsid(1);
-    atomicity_region->access_set[0] = *rd_set;
-    atomicity_region->access_set[1] = *wr_set;
-    atomicity_region->stack_trace = CreateStackTrace();
-    atomicity_region->used = false;
-    atomicity_region->n_mops_since_start = this->n_mops_since_start_;
-    // atomicity_region->Print();
-    // Printf("----------- %s\n", __FUNCTION__);
-    // ReportStackTrace(0, 7);
-    HandleAtomicityRegion(atomicity_region);
-  }
-
-  // Locks
-  void HandleLock(uintptr_t lock_addr, bool is_w_lock) {
-    Lock *lock = Lock::LookupOrCreate(lock_addr);
-
-    if (debug_lock) {
-      Printf("T%d lid=%d %sLock   %p; %s\n",
-           tid_.raw(), lock->lid().raw(),
-           is_w_lock ? "Wr" : "Rd",
-           lock_addr,
-           LockSet::ToString(lsid(is_w_lock)).c_str());
-
-      ReportStackTrace(0, 7);
-    }
-
-    // NOTE: we assume that all locks can be acquired recurively.
-    // No warning about recursive locking will be issued.
-    if (is_w_lock) {
-      // Recursive locks are properly handled because LockSet is in fact a
-      // multiset.
-      wr_lockset_ = LockSet::Add(wr_lockset_, lock);
-      rd_lockset_ = LockSet::Add(rd_lockset_, lock);
-      lock->WrLock(tid_, CreateStackTrace());
-    } else {
-      if (lock->wr_held()) {
-        ReportStackTrace();
-      }
-      rd_lockset_ = LockSet::Add(rd_lockset_, lock);
-      lock->RdLock(CreateStackTrace());
-    }
-
-    if (lock->is_pure_happens_before()) {
-      if (is_w_lock) {
-        HandleWait(lock->wr_signal_addr());
-      } else {
-        HandleWait(lock->rd_signal_addr());
-      }
-    }
-
-    if (G_flags->suggest_happens_before_arcs) {
-      lock_history_.OnLock(lock->lid());
-    }
-    NewSegmentForLockingEvent();
-    lock_era_access_set_[0].Clear();
-    lock_era_access_set_[1].Clear();
-  }
-
-  void HandleUnlock(uintptr_t lock_addr) {
-    HandleAccessSet();
-
-    Lock *lock = Lock::Lookup(lock_addr);
-    // If the lock is not found, report an error.
-    if (lock == NULL) {
-      ThreadSanitizerInvalidLockReport *report =
-          new ThreadSanitizerInvalidLockReport;
-      report->type = ThreadSanitizerReport::INVALID_LOCK;
-      report->tid = tid();
-      report->lock_addr = lock_addr;
-      report->stack_trace = CreateStackTrace();
-      ThreadSanitizerPrintReport(report);
-      return;
-    }
-    bool is_w_lock = lock->wr_held();
-
-    if (debug_lock) {
-      Printf("T%d lid=%d %sUnlock %p; %s\n",
-             tid_.raw(), lock->lid().raw(),
-             is_w_lock ? "Wr" : "Rd",
-             lock_addr,
-             LockSet::ToString(lsid(is_w_lock)).c_str());
-      ReportStackTrace(0, 7);
-    }
-
-    if (lock->is_pure_happens_before()) {
-      // reader unlock signals only to writer lock,
-      // writer unlock signals to both.
-      if (is_w_lock) {
-        HandleSignal(lock->rd_signal_addr());
-      }
-      HandleSignal(lock->wr_signal_addr());
-    }
-
-    if (!lock->wr_held() && !lock->rd_held()) {
-      ThreadSanitizerBadUnlockReport *report =
-          new ThreadSanitizerBadUnlockReport;
-      report->type = ThreadSanitizerReport::UNLOCK_NONLOCKED;
-      report->tid = tid();
-      report->lid = lock->lid();
-      report->stack_trace = CreateStackTrace();
-      ThreadSanitizerPrintReport(report);
-      return;
-    }
-
-    bool removed = false;
-    if (is_w_lock) {
-      lock->WrUnlock();
-      removed =  LockSet::Remove(wr_lockset_, lock, &wr_lockset_)
-              && LockSet::Remove(rd_lockset_, lock, &rd_lockset_);
-    } else {
-      lock->RdUnlock();
-      removed = LockSet::Remove(rd_lockset_, lock, &rd_lockset_);
-    }
-
-    if (!removed) {
-      ThreadSanitizerBadUnlockReport *report =
-          new ThreadSanitizerBadUnlockReport;
-      report->type = ThreadSanitizerReport::UNLOCK_FOREIGN;
-      report->tid = tid();
-      report->lid = lock->lid();
-      report->stack_trace = CreateStackTrace();
-      ThreadSanitizerPrintReport(report);
-    }
-
-    if (G_flags->suggest_happens_before_arcs) {
-      lock_history_.OnUnlock(lock->lid());
-    }
-
-    NewSegmentForLockingEvent();
-    lock_era_access_set_[0].Clear();
-    lock_era_access_set_[1].Clear();
-  }
-
-  // Handles memory access with race reports suppressed.
-  void HandleAtomicMop(uintptr_t a,
-                       uintptr_t pc,
-                       tsan_atomic_op op,
-                       tsan_memory_order mo,
-                       size_t size);
-
-  void HandleForgetSignaller(uintptr_t cv) {
-    SignallerMap::iterator it = signaller_map_->find(cv);
-    if (it != signaller_map_->end()) {
-      if (debug_happens_before) {
-        Printf("T%d: ForgetSignaller: %p:\n    %s\n", tid_.raw(), cv,
-            (it->second.vts)->ToString().c_str());
-        if (G_flags->debug_level >= 1) {
-          ReportStackTrace();
-        }
-      }
-      VTS::Unref(it->second.vts);
-      signaller_map_->erase(it);
-    }
-  }
-
-  LSID lsid(bool is_w) {
-    return is_w ? wr_lockset_ : rd_lockset_;
-  }
-
-  const LockHistory &lock_history() { return lock_history_; }
-
-  // SIGNAL/WAIT events.
-  void HandleWait(uintptr_t cv) {
-
-    SignallerMap::iterator it = signaller_map_->find(cv);
-    if (it != signaller_map_->end()) {
-      const VTS *signaller_vts = it->second.vts;
-      NewSegmentForWait(signaller_vts);
-    }
-
-    if (debug_happens_before) {
-      Printf("T%d: Wait: %p:\n    %s %s\n", tid_.raw(),
-             cv,
-             vts()->ToString().c_str(),
-             Segment::ToString(sid()).c_str());
-      if (G_flags->debug_level >= 1) {
-        ReportStackTrace();
-      }
-    }
-  }
-
-  void HandleSignal(uintptr_t cv) {
-    Signaller *signaller = &(*signaller_map_)[cv];
-    if (!signaller->vts) {
-      signaller->vts = vts()->Clone();
-    } else {
-      VTS *new_vts = VTS::Join(signaller->vts, vts());
-      VTS::Unref(signaller->vts);
-      signaller->vts = new_vts;
-    }
-    NewSegmentForSignal();
-    if (debug_happens_before) {
-      Printf("T%d: Signal: %p:\n    %s %s\n    %s\n", tid_.raw(), cv,
-             vts()->ToString().c_str(), Segment::ToString(sid()).c_str(),
-             (signaller->vts)->ToString().c_str());
-      if (G_flags->debug_level >= 1) {
-        ReportStackTrace();
-      }
-    }
-  }
-
-  void INLINE NewSegmentWithoutUnrefingOld(const char *call_site,
-                                           VTS *new_vts) {
-    DCHECK(new_vts);
-    SID new_sid = Segment::AddNewSegment(tid(), new_vts,
-                                         rd_lockset_, wr_lockset_);
-    SID old_sid = sid();
-    if (old_sid.raw() != 0 && new_vts != vts()) {
-      // Flush the cache if VTS changed - the VTS won't repeat.
-      recent_segments_cache_.Clear();
-    }
-    sid_ = new_sid;
-    Segment::Ref(new_sid, "TSanThread::NewSegmentWithoutUnrefingOld");
-
-    if (kSizeOfHistoryStackTrace > 0) {
-      FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
-    }
-    if (0)
-    Printf("2: %s T%d/S%d old_sid=%d NewSegment: %s\n", call_site,
-           tid().raw(), sid().raw(), old_sid.raw(),
-         vts()->ToString().c_str());
-  }
-
-  void INLINE NewSegment(const char *call_site, VTS *new_vts) {
-    SID old_sid = sid();
-    NewSegmentWithoutUnrefingOld(call_site, new_vts);
-    Segment::Unref(old_sid, "TSanThread::NewSegment");
-  }
-
-  void NewSegmentForLockingEvent() {
-    // Flush the cache since we can't reuse segments with different lockset.
-    recent_segments_cache_.Clear();
-    NewSegment(__FUNCTION__, vts()->Clone());
-  }
-
-  void NewSegmentForMallocEvent() {
-    // Flush the cache since we can't reuse segments with different lockset.
-    recent_segments_cache_.Clear();
-    NewSegment(__FUNCTION__, vts()->Clone());
-  }
-
-
-  void SetTopPc(uintptr_t pc) {
-    if (pc) {
-      DCHECK(!call_stack_->empty());
-      call_stack_->back() = pc;
-    }
-  }
-
-  void NOINLINE HandleSblockEnterSlowLocked() {
-    AssertTILHeld();
-    FlushStateIfOutOfSegments(this);
-    this->stats.history_creates_new_segment++;
-    VTS *new_vts = vts()->Clone();
-    NewSegment("HandleSblockEnter", new_vts);
-    recent_segments_cache_.Push(sid());
-    GetSomeFreshSids();  // fill the thread-local SID cache.
-  }
-
-  INLINE bool HandleSblockEnter(uintptr_t pc, bool allow_slow_path) {
-    DCHECK(G_flags->keep_history);
-    if (!pc) return true;
-
-    this->stats.events[SBLOCK_ENTER]++;
-
-    SetTopPc(pc);
-
-    bool refill_stack = false;
-    SID match = recent_segments_cache_.Search(call_stack_, sid(),
-                                              /*OUT*/&refill_stack);
-    DCHECK(kSizeOfHistoryStackTrace > 0);
-
-    if (match.valid()) {
-      // This part is 100% thread-local, no need for locking.
-      if (sid_ != match) {
-        Segment::Ref(match, "TSanThread::HandleSblockEnter");
-        this->AddDeadSid(sid_, "TSanThread::HandleSblockEnter");
-        sid_ = match;
-      }
-      if (refill_stack) {
-        this->stats.history_reuses_segment++;
-        FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
-      } else {
-        this->stats.history_uses_same_segment++;
-      }
-    } else if (fresh_sids_.size() > 0) {
-      // We have a fresh ready-to-use segment in thread local cache.
-      SID fresh_sid = fresh_sids_.back();
-      fresh_sids_.pop_back();
-      Segment::SetupFreshSid(fresh_sid, tid(), vts()->Clone(),
-                             rd_lockset_, wr_lockset_);
-      this->AddDeadSid(sid_, "TSanThread::HandleSblockEnter-1");
-      Segment::Ref(fresh_sid, "TSanThread::HandleSblockEnter-1");
-      sid_ = fresh_sid;
-      recent_segments_cache_.Push(sid());
-      FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
-      this->stats.history_uses_preallocated_segment++;
-    } else {
-      if (!allow_slow_path) return false;
-      AssertTILHeld();
-      // No fresh SIDs available, have to grab a lock and get few.
-      HandleSblockEnterSlowLocked();
-    }
-    return true;
-  }
-
-  void NewSegmentForWait(const VTS *signaller_vts) {
-    const VTS *current_vts   = vts();
-    if (0)
-    Printf("T%d NewSegmentForWait: \n  %s\n  %s\n", tid().raw(),
-           current_vts->ToString().c_str(),
-           signaller_vts->ToString().c_str());
-    // We don't want to create a happens-before arc if it will be redundant.
-    if (!VTS::HappensBeforeCached(signaller_vts, current_vts)) {
-      VTS *new_vts = VTS::Join(current_vts, signaller_vts);
-      NewSegment("NewSegmentForWait", new_vts);
-    }
-    DCHECK(VTS::HappensBeforeCached(signaller_vts, vts()));
-  }
-
-  void NewSegmentForSignal() {
-    VTS *cur_vts = vts();
-    VTS *new_vts = VTS::CopyAndTick(cur_vts, tid());
-    NewSegment("NewSegmentForSignal", new_vts);
-  }
-
-  // When creating a child thread, we need to know
-  // 1. where the thread was created (ctx)
-  // 2. What was the vector clock of the parent thread (vts).
-
-  struct ThreadCreateInfo {
-    StackTrace *ctx;
-    VTS        *vts;
-  };
-
-  static void StopIgnoringAccessesInT0BecauseNewThreadStarted() {
-    AssertTILHeld();
-    if (g_so_far_only_one_thread) {
-      g_so_far_only_one_thread = false;
-      Get(TID(0))->set_ignore_all_accesses(false);
-    }
-  }
-
-  // This event comes before the child is created (e.g. just
-  // as we entered pthread_create).
-  void HandleThreadCreateBefore(TID parent_tid, uintptr_t pc) {
-    CHECK(parent_tid == tid());
-    StopIgnoringAccessesInT0BecauseNewThreadStarted();
-    // Store ctx and vts under TID(0).
-    ThreadCreateInfo info;
-    info.ctx = CreateStackTrace(pc);
-    info.vts = vts()->Clone();
-    CHECK(info.ctx && info.vts);
-    child_tid_to_create_info_[TID(0)] = info;
-    // Tick vts.
-    this->NewSegmentForSignal();
-
-    if (debug_thread) {
-      Printf("T%d: THR_CREATE_BEFORE\n", parent_tid.raw());
-    }
-  }
-
-  // This event comes when we are exiting the thread creation routine.
-  // It may appear before *or* after THR_START event, at least with PIN.
-  void HandleThreadCreateAfter(TID parent_tid, TID child_tid) {
-    CHECK(parent_tid == tid());
-    // Place the info under child_tid if we did not use it yet.
-    if (child_tid_to_create_info_.count(TID(0))){
-      child_tid_to_create_info_[child_tid] = child_tid_to_create_info_[TID(0)];
-      child_tid_to_create_info_.erase(TID(0));
-    }
-
-    if (debug_thread) {
-      Printf("T%d: THR_CREATE_AFTER %d\n", parent_tid.raw(), child_tid.raw());
-    }
-  }
-
-  void HandleChildThreadStart(TID child_tid, VTS **vts, StackTrace **ctx) {
-    TSanThread *parent = this;
-    ThreadCreateInfo info;
-    if (child_tid_to_create_info_.count(child_tid)) {
-      // We already seen THR_CREATE_AFTER, so the info is under child_tid.
-      info = child_tid_to_create_info_[child_tid];
-      child_tid_to_create_info_.erase(child_tid);
-      CHECK(info.ctx && info.vts);
-    } else if (child_tid_to_create_info_.count(TID(0))){
-      // We have not seen THR_CREATE_AFTER, but already seen THR_CREATE_BEFORE.
-      info = child_tid_to_create_info_[TID(0)];
-      child_tid_to_create_info_.erase(TID(0));
-      CHECK(info.ctx && info.vts);
-    } else {
-      // We have not seen THR_CREATE_BEFORE/THR_CREATE_AFTER.
-      // If the tool is single-threaded (valgrind) these events are redundant.
-      info.ctx = parent->CreateStackTrace();
-      info.vts = parent->vts()->Clone();
-      parent->NewSegmentForSignal();
-    }
-    *ctx = info.ctx;
-    VTS *singleton = VTS::CreateSingleton(child_tid);
-    *vts = VTS::Join(singleton, info.vts);
-    VTS::Unref(singleton);
-    VTS::Unref(info.vts);
-
-
-    if (debug_thread) {
-      Printf("T%d: THR_START parent: T%d : %s %s\n", child_tid.raw(),
-             parent->tid().raw(),
-             parent->vts()->ToString().c_str(),
-             (*vts)->ToString().c_str());
-      if (G_flags->announce_threads) {
-        Printf("%s\n", (*ctx)->ToString().c_str());
-      }
-    }
-
-    // Parent should have ticked its VTS so there should be no h-b.
-    DCHECK(!VTS::HappensBefore(parent->vts(), *vts));
-  }
-
-  // Support for Cyclic Barrier, e.g. pthread_barrier_t.
-  // We need to create (barrier_count-1)^2 h-b arcs between
-  // threads blocking on a barrier. We should not create any h-b arcs
-  // for two calls to barrier_wait if the barrier was reset between then.
-  struct CyclicBarrierInfo {
-    // The value given to barrier_init.
-    uint32_t barrier_count;
-    // How many times we may block on this barrier before resetting.
-    int32_t calls_before_reset;
-    // How many times we entered the 'wait-before' and 'wait-after' handlers.
-    int32_t n_wait_before, n_wait_after;
-  };
-  // The following situation is possible:
-  // - N threads blocked on a barrier.
-  // - All N threads reached the barrier and we started getting 'wait-after'
-  //   events, but did not yet get all of them.
-  // - N threads blocked on the barrier again and we started getting
-  //   'wait-before' events from the next barrier epoch.
-  // - We continue getting 'wait-after' events from the previous epoch.
-  //
-  // We don't want to create h-b arcs between barrier events of different
-  // epochs, so we use 'barrier + (epoch % 4)' as an object on which we
-  // signal and wait (it is unlikely that more than 4 epochs are live at once.
-  enum { kNumberOfPossibleBarrierEpochsLiveAtOnce = 4 };
-  // Maps the barrier pointer to CyclicBarrierInfo.
-  typedef unordered_map<uintptr_t, CyclicBarrierInfo> CyclicBarrierMap;
-
-  CyclicBarrierInfo &GetCyclicBarrierInfo(uintptr_t barrier) {
-    if (cyclic_barrier_map_ == NULL) {
-      cyclic_barrier_map_ = new CyclicBarrierMap;
-    }
-    return (*cyclic_barrier_map_)[barrier];
-  }
-
-  void HandleBarrierInit(uintptr_t barrier, uint32_t n) {
-    CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
-    CHECK(n > 0);
-    memset(&info, 0, sizeof(CyclicBarrierInfo));
-    info.barrier_count = n;
-  }
-
-  void HandleBarrierWaitBefore(uintptr_t barrier) {
-    CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
-
-    CHECK(info.calls_before_reset >= 0);
-    int32_t epoch = info.n_wait_before / info.barrier_count;
-    epoch %= kNumberOfPossibleBarrierEpochsLiveAtOnce;
-    info.n_wait_before++;
-    if (info.calls_before_reset == 0) {
-      // We are blocking the first time after reset. Clear the VTS.
-      info.calls_before_reset = info.barrier_count;
-      Signaller &signaller = (*signaller_map_)[barrier + epoch];
-      VTS::Unref(signaller.vts);
-      signaller.vts = NULL;
-      if (debug_happens_before) {
-        Printf("T%d barrier %p (epoch %d) reset\n", tid().raw(),
-               barrier, epoch);
-      }
-    }
-    info.calls_before_reset--;
-    // Signal to all threads that blocked on this barrier.
-    if (debug_happens_before) {
-      Printf("T%d barrier %p (epoch %d) wait before\n", tid().raw(),
-             barrier, epoch);
-    }
-    HandleSignal(barrier + epoch);
-  }
-
-  void HandleBarrierWaitAfter(uintptr_t barrier) {
-    CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
-    int32_t epoch = info.n_wait_after / info.barrier_count;
-    epoch %= kNumberOfPossibleBarrierEpochsLiveAtOnce;
-    info.n_wait_after++;
-    if (debug_happens_before) {
-      Printf("T%d barrier %p (epoch %d) wait after\n", tid().raw(),
-             barrier, epoch);
-    }
-    HandleWait(barrier + epoch);
-  }
-
-  // Call stack  -------------
-  void PopCallStack() {
-    CHECK(!call_stack_->empty());
-    call_stack_->pop_back();
-  }
-
-  void HandleRtnCall(uintptr_t call_pc, uintptr_t target_pc,
-                     IGNORE_BELOW_RTN ignore_below) {
-    this->stats.events[RTN_CALL]++;
-    if (!call_stack_->empty() && call_pc) {
-      call_stack_->back() = call_pc;
-    }
-    call_stack_->push_back(target_pc);
-
-    bool ignore = false;
-    if (ignore_below == IGNORE_BELOW_RTN_UNKNOWN) {
-      if (ignore_below_cache_.Lookup(target_pc, &ignore) == false) {
-        ignore = ThreadSanitizerIgnoreAccessesBelowFunction(target_pc);
-        ignore_below_cache_.Insert(target_pc, ignore);
-        G_stats->ignore_below_cache_miss++;
-      } else {
-        // Just in case, check the result of caching.
-        DCHECK(ignore ==
-               ThreadSanitizerIgnoreAccessesBelowFunction(target_pc));
-      }
-    } else {
-      DCHECK(ignore_below == IGNORE_BELOW_RTN_YES ||
-             ignore_below == IGNORE_BELOW_RTN_NO);
-      ignore = ignore_below == IGNORE_BELOW_RTN_YES;
-    }
-
-    if (fun_r_ignore_) {
-      fun_r_ignore_++;
-    } else if (ignore) {
-      fun_r_ignore_ = 1;
-      set_ignore_all_accesses(true);
-    }
-  }
-
-  void HandleRtnExit() {
-    this->stats.events[RTN_EXIT]++;
-    if (!call_stack_->empty()) {
-      call_stack_->pop_back();
-      if (fun_r_ignore_) {
-        if (--fun_r_ignore_ == 0) {
-          set_ignore_all_accesses(false);
-        }
-      }
-    }
-  }
-
-  uintptr_t GetCallstackEntry(size_t offset_from_top) {
-    if (offset_from_top >= call_stack_->size()) return 0;
-    return (*call_stack_)[call_stack_->size() - offset_from_top - 1];
-  }
-
-  string CallStackRtnName(size_t offset_from_top = 0) {
-    if (call_stack_->size() <= offset_from_top)
-      return "";
-    uintptr_t pc = (*call_stack_)[call_stack_->size() - offset_from_top - 1];
-    return PcToRtnName(pc, false);
-  }
-
-  string CallStackToStringRtnOnly(int len) {
-    string res;
-    for (int i = 0; i < len; i++) {
-      if (i)
-        res += " ";
-      res += CallStackRtnName(i);
-    }
-    return res;
-  }
-
-  uintptr_t CallStackTopPc() {
-    if (call_stack_->empty())
-      return 0;
-    return call_stack_->back();
-  }
-
-  INLINE void FillEmbeddedStackTrace(uintptr_t *emb_trace) {
-    size_t size = min(call_stack_->size(), (size_t)kSizeOfHistoryStackTrace);
-    size_t idx = call_stack_->size() - 1;
-    uintptr_t *pcs = call_stack_->pcs();
-    for (size_t i = 0; i < size; i++, idx--) {
-      emb_trace[i] = pcs[idx];
-    }
-    if (size < (size_t) kSizeOfHistoryStackTrace) {
-      emb_trace[size] = 0;
-    }
-  }
-
-  INLINE void FillStackTrace(StackTrace *trace, size_t size) {
-    size_t idx = call_stack_->size() - 1;
-    uintptr_t *pcs = call_stack_->pcs();
-    for (size_t i = 0; i < size; i++, idx--) {
-      trace->Set(i, pcs[idx]);
-    }
-  }
-
-  INLINE StackTrace *CreateStackTrace(uintptr_t pc = 0,
-                                      int max_len = -1,
-                                      int capacity = 0) {
-    if (!call_stack_->empty() && pc) {
-      call_stack_->back() = pc;
-    }
-    if (max_len <= 0) {
-      max_len = G_flags->num_callers;
-    }
-    int size = call_stack_->size();
-    if (size > max_len)
-      size = max_len;
-    StackTrace *res = StackTrace::CreateNewEmptyStackTrace(size, capacity);
-    FillStackTrace(res, size);
-    return res;
-  }
-
-  void ReportStackTrace(uintptr_t pc = 0, int max_len = -1) {
-    StackTrace *trace = CreateStackTrace(pc, max_len);
-    Report("%s", trace->ToString().c_str());
-    StackTrace::Delete(trace);
-  }
-
-  static void ForgetAllState() {
-    // G_flags->debug_level = 2;
-    for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
-      TSanThread *thr = Get(TID(i));
-      thr->recent_segments_cache_.ForgetAllState();
-      thr->sid_ = SID();  // Reset the old SID so we don't try to read its VTS.
-      VTS *singleton_vts = VTS::CreateSingleton(TID(i), 2);
-      if (thr->is_running()) {
-        thr->NewSegmentWithoutUnrefingOld("ForgetAllState", singleton_vts);
-      }
-      for (map<TID, ThreadCreateInfo>::iterator j =
-               thr->child_tid_to_create_info_.begin();
-           j != thr->child_tid_to_create_info_.end(); ++j) {
-        ThreadCreateInfo &info = j->second;
-        VTS::Unref(info.vts);
-        // The parent's VTS should neither happen-before nor equal the child's.
-        info.vts = VTS::CreateSingleton(TID(i), 1);
-      }
-      if (thr->vts_at_exit_) {
-        VTS::Unref(thr->vts_at_exit_);
-        thr->vts_at_exit_ = singleton_vts->Clone();
-      }
-      thr->dead_sids_.clear();
-      thr->fresh_sids_.clear();
-    }
-    signaller_map_->ClearAndDeleteElements();
-  }
-
-  static void InitClassMembers() {
-    ScopedMallocCostCenter malloc_cc("InitClassMembers");
-    all_threads_        = new TSanThread*[G_flags->max_n_threads];
-    memset(all_threads_, 0, sizeof(TSanThread*) * G_flags->max_n_threads);
-    n_threads_          = 0;
-    signaller_map_      = new SignallerMap;
-  }
-
-  BitSet *lock_era_access_set(int is_w) {
-    return &lock_era_access_set_[is_w];
-  }
-
-  // --------- dead SIDs, fresh SIDs
-  // When running fast path w/o a lock we need to recycle SIDs to a thread-local
-  // pool. HasRoomForDeadSids and AddDeadSid may be called w/o a lock.
-  // FlushDeadSids should be called under a lock.
-  // When creating a new segment on SBLOCK_ENTER, we need to get a fresh SID
-  // from somewhere. We keep a pile of fresh ready-to-use SIDs in
-  // a thread-local array.
-  enum { kMaxNumDeadSids = 64,
-         kMaxNumFreshSids = 256, };
-  INLINE void AddDeadSid(SID sid, const char *where) {
-    if (TS_SERIALIZED) {
-      Segment::Unref(sid, where);
-    } else {
-      if (Segment::UnrefNoRecycle(sid, where) == 0) {
-        dead_sids_.push_back(sid);
-      }
-    }
-  }
-
-  INLINE void FlushDeadSids() {
-    if (TS_SERIALIZED) return;
-    size_t n = dead_sids_.size();
-    for (size_t i = 0; i < n; i++) {
-      SID sid = dead_sids_[i];
-      Segment::AssertLive(sid, __LINE__);
-      DCHECK(Segment::Get(sid)->ref_count() == 0);
-      Segment::RecycleOneSid(sid);
-    }
-    dead_sids_.clear();
-  }
-
-  INLINE bool HasRoomForDeadSids() const {
-    return TS_SERIALIZED ? false :
-        dead_sids_.size() < kMaxNumDeadSids - 2;
-  }
-
-  void GetSomeFreshSids() {
-    size_t cur_size = fresh_sids_.size();
-    DCHECK(cur_size <= kMaxNumFreshSids);
-    if (cur_size > kMaxNumFreshSids / 2) {
-      // We already have quite a few fresh SIDs, do nothing.
-      return;
-    }
-    DCHECK(fresh_sids_.capacity() >= kMaxNumFreshSids);
-    size_t n_requested_sids = kMaxNumFreshSids - cur_size;
-    fresh_sids_.resize(kMaxNumFreshSids);
-    Segment::AllocateFreshSegments(n_requested_sids, &fresh_sids_[cur_size]);
-  }
-
-  void ReleaseFreshSids() {
-    for (size_t i = 0; i < fresh_sids_.size(); i++) {
-      Segment::RecycleOneFreshSid(fresh_sids_[i]);
-    }
-    fresh_sids_.clear();
-  }
-
- private:
-  bool is_running_;
-  string thread_name_;
-
-  TID    tid_;         // This thread's tid.
-  SID    sid_;         // Current segment ID.
-  TID    parent_tid_;  // Parent's tid.
-  bool   thread_local_copy_of_g_has_expensive_flags_;
-  uintptr_t  max_sp_;
-  uintptr_t  min_sp_;
-  uintptr_t  stack_size_for_ignore_;
-  uintptr_t  fun_r_ignore_;  // > 0 if we are inside a fun_r-ed function.
-  uintptr_t  min_sp_for_ignore_;
-  uintptr_t  n_mops_since_start_;
-  StackTrace *creation_context_;
-  bool      announced_;
-
-  LSID   rd_lockset_;
-  LSID   wr_lockset_;
-
-  // These bits should be read in the hottest loop, so we combine them all
-  // together.
-  // bit 1 -- ignore reads.
-  // bit 2 -- ignore writes.
-  // bit 3 -- have expensive flags
-  int expensive_bits_;
-  int ignore_depth_[2];
-  StackTrace *ignore_context_[2];
-
-  VTS *vts_at_exit_;
-
-  CallStack *call_stack_;
-
-  vector<SID> dead_sids_;
-  vector<SID> fresh_sids_;
-
-  PtrToBoolCache<251> ignore_below_cache_;
-
-  LockHistory lock_history_;
-  BitSet lock_era_access_set_[2];
-  RecentSegmentsCache recent_segments_cache_;
-
-  map<TID, ThreadCreateInfo> child_tid_to_create_info_;
-
-  // This var is used to suppress race reports
-  // when handling atomic memory accesses.
-  // That is, an atomic memory access can't race with other accesses,
-  // however plain memory accesses can race with atomic memory accesses.
-  int inside_atomic_op_;
-
-  prng_t rand_state_;
-
-  struct Signaller {
-    VTS *vts;
-  };
-
-  class SignallerMap: public unordered_map<uintptr_t, Signaller> {
-    public:
-     void ClearAndDeleteElements() {
-       for (iterator it = begin(); it != end(); ++it) {
-         VTS::Unref(it->second.vts);
-       }
-       clear();
-     }
-  };
-
-  // All threads. The main thread has tid 0.
-  static TSanThread **all_threads_;
-  static int      n_threads_;
-
-  // signaller address -> VTS
-  static SignallerMap *signaller_map_;
-  static CyclicBarrierMap *cyclic_barrier_map_;
-};
-
-INLINE static int32_t raw_tid(TSanThread *t) {
-  return t->tid().raw();
-}
-
-// TSanThread:: static members
-TSanThread                    **TSanThread::all_threads_;
-int                         TSanThread::n_threads_;
-TSanThread::SignallerMap       *TSanThread::signaller_map_;
-TSanThread::CyclicBarrierMap   *TSanThread::cyclic_barrier_map_;
-
-
-// -------- TsanAtomicCore ------------------ {{{1
-
-// Responsible for handling of atomic memory accesses.
-class TsanAtomicCore {
- public:
-  TsanAtomicCore();
-
-  void HandleWrite(TSanThread* thr,
-                   uintptr_t a,
-                   uint64_t v,
-                   uint64_t prev,
-                   bool is_acquire,
-                   bool is_release,
-                   bool is_rmw);
-
-  uint64_t HandleRead(TSanThread* thr,
-                      uintptr_t a,
-                      uint64_t v,
-                      bool is_acquire);
-
-  void ClearMemoryState(uintptr_t a, uintptr_t b);
-
- private:
-  // Represents one value in modification history
-  // of an atomic variable.
-  struct AtomicHistoryEntry {
-    // Actual value.
-    // (atomics of size more than uint64_t are not supported as of now)
-    uint64_t val;
-    // ID of a thread that did the modification.
-    TID tid;
-    // The thread's clock during the modification.
-    int32_t clk;
-    // Vector clock that is acquired by a thread
-    // that loads the value.
-    // Similar to Signaller::vts.
-    VTS* vts;
-  };
-
-  // Descriptor of an atomic variable.
-  struct Atomic {
-    // Number of stored entries in the modification order of the variable.
-    // This represents space-modelling preciseness trade-off.
-    // 4 values should be generally enough.
-    static int32_t const kHistSize = 4;
-    // Current position in the modification order.
-    int32_t hist_pos;
-    // Modification history organized as a circular buffer.
-    // That is, old values are discarded.
-    AtomicHistoryEntry hist [kHistSize];
-    // It's basically a tid->hist_pos map that tracks what threads
-    // had seen what values. It's required to meet the following requirement:
-    // even relaxed loads must not be reordered in a single thread.
-    VectorClock last_seen;
-
-    Atomic();
-    void reset(bool init = false);
-  };
-
-  typedef map<uintptr_t, Atomic> AtomicMap;
-  AtomicMap atomic_map_;
-
-  void AtomicFixHist(Atomic* atomic,
-                     uint64_t prev);
-
-  TsanAtomicCore(TsanAtomicCore const&);
-  void operator=(TsanAtomicCore const&);
-};
-
-
-static TsanAtomicCore* g_atomicCore;
-
-
-// -------- Clear Memory State ------------------ {{{1
-static void INLINE UnrefSegmentsInMemoryRange(uintptr_t a, uintptr_t b,
-                                                Mask mask, CacheLine *line) {
-  while (!mask.Empty()) {
-    uintptr_t x = mask.GetSomeSetBit();
-    DCHECK(mask.Get(x));
-    mask.Clear(x);
-    line->GetValuePointer(x)->Unref("Detector::UnrefSegmentsInMemoryRange");
-  }
-}
-
-void INLINE ClearMemoryStateInOneLine(TSanThread *thr, uintptr_t addr,
-                                      uintptr_t beg, uintptr_t end) {
-  AssertTILHeld();
-  CacheLine *line = G_cache->GetLineIfExists(thr, addr, __LINE__);
-  // CacheLine *line = G_cache->GetLineOrCreateNew(addr, __LINE__);
-  if (line) {
-    DCHECK(beg < CacheLine::kLineSize);
-    DCHECK(end <= CacheLine::kLineSize);
-    DCHECK(beg < end);
-    Mask published = line->published();
-    if (UNLIKELY(!published.Empty())) {
-      Mask mask(published.GetRange(beg, end));
-      ClearPublishedAttribute(line, mask);
-    }
-    Mask old_used = line->ClearRangeAndReturnOldUsed(beg, end);
-    UnrefSegmentsInMemoryRange(beg, end, old_used, line);
-    G_cache->ReleaseLine(thr, addr, line, __LINE__);
-  }
-}
-
-// clear memory state for [a,b)
-void NOINLINE ClearMemoryState(TSanThread *thr, uintptr_t a, uintptr_t b) {
-  if (a == b) return;
-  CHECK(a < b);
-  uintptr_t line1_tag = 0, line2_tag = 0;
-  uintptr_t single_line_tag = GetCacheLinesForRange(a, b,
-                                                    &line1_tag, &line2_tag);
-  if (single_line_tag) {
-    ClearMemoryStateInOneLine(thr, a, a - single_line_tag,
-                              b - single_line_tag);
-    return;
-  }
-
-  uintptr_t a_tag = CacheLine::ComputeTag(a);
-  ClearMemoryStateInOneLine(thr, a, a - a_tag, CacheLine::kLineSize);
-
-  for (uintptr_t tag_i = line1_tag; tag_i < line2_tag;
-       tag_i += CacheLine::kLineSize) {
-    ClearMemoryStateInOneLine(thr, tag_i, 0, CacheLine::kLineSize);
-  }
-
-  if (b > line2_tag) {
-    ClearMemoryStateInOneLine(thr, line2_tag, 0, b - line2_tag);
-  }
-
-  if (DEBUG_MODE && G_flags->debug_level >= 2) {
-    // Check that we've cleared it. Slow!
-    for (uintptr_t x = a; x < b; x++) {
-      uintptr_t off = CacheLine::ComputeOffset(x);
-      (void)off;
-      CacheLine *line = G_cache->GetLineOrCreateNew(thr, x, __LINE__);
-      CHECK(!line->has_shadow_value().Get(off));
-      G_cache->ReleaseLine(thr, x, line, __LINE__);
-    }
-  }
-
-  g_atomicCore->ClearMemoryState(a, b);
-}
-
-// -------- PCQ --------------------- {{{1
-struct PCQ {
-  uintptr_t pcq_addr;
-  deque<VTS*> putters;
-};
-
-typedef map<uintptr_t, PCQ> PCQMap;
-static PCQMap *g_pcq_map;
-
-// -------- Heap info ---------------------- {{{1
-#include "ts_heap_info.h"
-// Information about heap memory.
-
-struct HeapInfo {
-  uintptr_t   ptr;
-  uintptr_t   size;
-  SID         sid;
-  HeapInfo() : ptr(0), size(0), sid(0) { }
-
-  Segment *seg() { return Segment::Get(sid); }
-  TID tid() { return seg()->tid(); }
-  string StackTraceString() { return Segment::StackTraceString(sid); }
-};
-
-static HeapMap<HeapInfo> *G_heap_map;
-
-struct ThreadStackInfo {
-  uintptr_t   ptr;
-  uintptr_t   size;
-  ThreadStackInfo() : ptr(0), size(0) { }
-};
-
-static HeapMap<ThreadStackInfo> *G_thread_stack_map;
-
-// -------- Forget all state -------- {{{1
-// We need to forget all state and start over because we've
-// run out of some resources (most likely, segment IDs).
-static void ForgetAllStateAndStartOver(TSanThread *thr, const char *reason) {
-  // This is done under the main lock.
-  AssertTILHeld();
-  size_t start_time = g_last_flush_time = TimeInMilliSeconds();
-  Report("T%d INFO: %s. Flushing state.\n", raw_tid(thr), reason);
-
-  if (TS_SERIALIZED == 0) {
-    // We own the lock, but we also must acquire all cache lines
-    // so that the fast-path (unlocked) code does not execute while
-    // we are flushing.
-    G_cache->AcquireAllLines(thr);
-  }
-
-
-  if (0) {
-    Report("INFO: Thread Sanitizer will now forget all history.\n");
-    Report("INFO: This is experimental, and may fail!\n");
-    if (G_flags->keep_history > 0) {
-      Report("INFO: Consider re-running with --keep_history=0\n");
-    }
-    if (G_flags->show_stats) {
-        G_stats->PrintStats();
-    }
-  }
-
-  G_stats->n_forgets++;
-
-  Segment::ForgetAllState();
-  SegmentSet::ForgetAllState();
-  TSanThread::ForgetAllState();
-  VTS::FlushHBCache();
-
-  G_heap_map->Clear();
-
-  g_publish_info_map->clear();
-
-  for (PCQMap::iterator it = g_pcq_map->begin(); it != g_pcq_map->end(); ++it) {
-    PCQ &pcq = it->second;
-    for (deque<VTS*>::iterator it2 = pcq.putters.begin();
-         it2 != pcq.putters.end(); ++it2) {
-      VTS::Unref(*it2);
-      *it2 = VTS::CreateSingleton(TID(0), 1);
-    }
-  }
-
-  // Must be the last one to flush as it effectively releases the
-  // cach lines and enables fast path code to run in other threads.
-  G_cache->ForgetAllState(thr);
-
-  size_t stop_time = TimeInMilliSeconds();
-  if (DEBUG_MODE || (stop_time - start_time > 0)) {
-    Report("T%d INFO: Flush took %ld ms\n", raw_tid(thr),
-           stop_time - start_time);
-  }
-}
-
-static INLINE void FlushStateIfOutOfSegments(TSanThread *thr) {
-  if (Segment::NumberOfSegments() > kMaxSIDBeforeFlush) {
-    // too few sids left -- flush state.
-    if (DEBUG_MODE) {
-      G_cache->PrintStorageStats();
-      Segment::ShowSegmentStats();
-    }
-    ForgetAllStateAndStartOver(thr, "run out of segment IDs");
-  }
-}
-
-// -------- Expected Race ---------------------- {{{1
-typedef  HeapMap<ExpectedRace> ExpectedRacesMap;
-static ExpectedRacesMap *G_expected_races_map;
-static bool g_expecting_races;
-static int g_found_races_since_EXPECT_RACE_BEGIN;
-
-ExpectedRace* ThreadSanitizerFindExpectedRace(uintptr_t addr) {
-  return G_expected_races_map->GetInfo(addr);
-}
-
-// -------- Suppressions ----------------------- {{{1
-static const char default_suppressions[] =
-// TODO(kcc): as it gets bigger, move it into a separate object file.
-"# We need to have some default suppressions, but we don't want to    \n"
-"# keep them in a separate text file, so we keep the in the code.     \n"
-
-#ifdef VGO_darwin
-"{                                                                    \n"
-"   dyld tries to unlock an invalid mutex when adding/removing image. \n"
-"   ThreadSanitizer:InvalidLock                                       \n"
-"   fun:pthread_mutex_unlock                                          \n"
-"   fun:_dyld_register_func_for_*_image                               \n"
-"}                                                                    \n"
-
-"{                                                                      \n"
-"  Benign reports in __NSOperationInternal when using workqueue threads \n"
-"  ThreadSanitizer:Race                                                 \n"
-"  fun:__+[__NSOperationInternal _observeValueForKeyPath:ofObject:changeKind:oldValue:newValue:indexes:context:]_block_invoke_*\n"
-"  fun:_dispatch_call_block_and_release                                 \n"
-"}                                                                      \n"
-
-"{                                                                    \n"
-"  Benign race in GCD when using workqueue threads.                   \n"
-"  ThreadSanitizer:Race                                               \n"
-"  fun:____startOperations_block_invoke_*                             \n"
-"  ...                                                                \n"
-"  fun:_dispatch_call_block_and_release                               \n"
-"}                                                                    \n"
-
-"{                                                                    \n"
-"  Benign race in NSOQSchedule when using workqueue threads.          \n"
-"  ThreadSanitizer:Race                                               \n"
-"  fun:__doStart*                                                     \n"
-"  ...                                                                \n"
-"  fun:_dispatch_call_block_and_release                               \n"
-"}                                                                    \n"
-
-
-#endif
-
-#ifndef _MSC_VER
-"{                                                                   \n"
-"  False reports on std::string internals. See TSan issue #40.       \n"
-"  ThreadSanitizer:Race                                              \n"
-"  ...                                                               \n"
-"  fun:*~basic_string*                                               \n"
-"}                                                                   \n"
-
-"{                                                                   \n"
-"  False reports on std::string internals. See TSan issue #40.       \n"
-"  ThreadSanitizer:Race                                              \n"
-"  ...                                                               \n"
-"  fun:*basic_string*_M_destroy                                      \n"
-"}                                                                   \n"
-
-#else
-"{                                                                   \n"
-"  False lock report inside ntdll.dll                                \n"
-"  ThreadSanitizer:InvalidLock                                       \n"
-"  fun:*                                                             \n"
-"  obj:*ntdll.dll                                                    \n"
-"}                                                                   \n"
-
-"{                                                                   \n"
-"  False report due to lack of debug symbols in ntdll.dll  (a)       \n"
-"  ThreadSanitizer:InvalidLock                                       \n"
-"  fun:*SRWLock*                                                     \n"
-"}                                                                   \n"
-
-"{                                                                   \n"
-"  False report due to lack of debug symbols in ntdll.dll  (b)       \n"
-"  ThreadSanitizer:UnlockForeign                                     \n"
-"  fun:*SRWLock*                                                     \n"
-"}                                                                   \n"
-
-"{                                                                   \n"
-"  False report due to lack of debug symbols in ntdll.dll  (c)       \n"
-"  ThreadSanitizer:UnlockNonLocked                                   \n"
-"  fun:*SRWLock*                                                     \n"
-"}                                                                   \n"
-
-"{                                                                   \n"
-"  False reports on std::string internals (2). See TSan issue #40.   \n"
-"  ThreadSanitizer:Race                                              \n"
-"  ...                                                               \n"
-"  fun:*basic_string*scalar deleting destructor*                     \n"
-"}                                                                   \n"
-#endif
-
-#ifdef TS_PIN
-"{                                                                   \n"
-"  Suppression for issue 54 (PIN lacks support for IFUNC)            \n"
-"  ThreadSanitizer:Race                                              \n"
-"  ...                                                               \n"
-"  fun:*NegativeTests_Strlen::Worker*                                \n"
-"}                                                                   \n"
-#endif
-
-;
-
-// -------- Report Storage --------------------- {{{1
-class ReportStorage {
- public:
-
-  ReportStorage()
-   : n_reports(0),
-     n_race_reports(0),
-     program_finished_(0),
-     unwind_cb_(0) {
-    if (G_flags->generate_suppressions) {
-      Report("INFO: generate_suppressions = true\n");
-    }
-    // Read default suppressions
-    int n = suppressions_.ReadFromString(default_suppressions);
-    if (n == -1) {
-      Report("Error reading default suppressions at line %d: %s\n",
-          suppressions_.GetErrorLineNo(),
-          suppressions_.GetErrorString().c_str());
-      exit(1);
-    }
-
-    // Read user-supplied suppressions.
-    for (size_t i = 0; i < G_flags->suppressions.size(); i++) {
-      const string &supp_path = G_flags->suppressions[i];
-      Report("INFO: reading suppressions file %s\n", supp_path.c_str());
-      int n = suppressions_.ReadFromString(ReadFileToString(supp_path, true));
-      if (n == -1) {
-        Report("Error at line %d: %s\n",
-            suppressions_.GetErrorLineNo(),
-            suppressions_.GetErrorString().c_str());
-        exit(1);
-      }
-      Report("INFO: %6d suppression(s) read from file %s\n",
-             n, supp_path.c_str());
-    }
-  }
-
-  bool NOINLINE AddReport(TSanThread *thr, uintptr_t pc, bool is_w, uintptr_t addr,
-                          int size,
-                          ShadowValue old_sval, ShadowValue new_sval,
-                          bool is_published) {
-    {
-      // Check this isn't a "_ZNSs4_Rep20_S_empty_rep_storageE" report.
-      uintptr_t offset;
-      string symbol_descr;
-      if (GetNameAndOffsetOfGlobalObject(addr, &symbol_descr, &offset)) {
-        if (StringMatch("*empty_rep_storage*", symbol_descr))
-          return false;
-        if (StringMatch("_IO_stdfile_*_lock", symbol_descr))
-          return false;
-        if (StringMatch("_IO_*_stdout_", symbol_descr))
-          return false;
-        if (StringMatch("_IO_*_stderr_", symbol_descr))
-          return false;
-      }
-    }
-
-    bool is_expected = false;
-    ExpectedRace *expected_race = G_expected_races_map->GetInfo(addr);
-    if (debug_expected_races) {
-      Printf("Checking expected race for %lx; exp_race=%p\n",
-             addr, expected_race);
-      if (expected_race) {
-        Printf("  FOUND\n");
-      }
-    }
-
-    if (expected_race) {
-      if (G_flags->nacl_untrusted != expected_race->is_nacl_untrusted) {
-        Report("WARNING: this race is only expected in NaCl %strusted mode\n",
-            expected_race->is_nacl_untrusted ? "un" : "");
-      } else {
-        is_expected = true;
-        expected_race->count++;
-      }
-    }
-
-    if (g_expecting_races) {
-      is_expected = true;
-      g_found_races_since_EXPECT_RACE_BEGIN++;
-    }
-
-    if (is_expected && !G_flags->show_expected_races) return false;
-
-    StackTrace *stack_trace = thr->CreateStackTrace(pc);
-    if (unwind_cb_) {
-      int const maxcnt = 256;
-      uintptr_t cur_stack [maxcnt];
-      int cnt = unwind_cb_(cur_stack, maxcnt, pc);
-      if (cnt > 0 && cnt <= maxcnt) {
-        cnt = min<int>(cnt, stack_trace->capacity());
-        stack_trace->set_size(cnt);
-        for (int i = 0; i < cnt; i++)
-          stack_trace->Set(i, cur_stack[i]);
-      }
-    }
-    int n_reports_for_this_context = reported_stacks_[stack_trace]++;
-
-    if (n_reports_for_this_context > 0) {
-      // we already reported a race here.
-      StackTrace::Delete(stack_trace);
-      return false;
-    }
-
-
-    ThreadSanitizerDataRaceReport *race_report =
-        new ThreadSanitizerDataRaceReport;
-
-    race_report->type = ThreadSanitizerReport::DATA_RACE;
-    race_report->new_sval = new_sval;
-    race_report->old_sval = old_sval;
-    race_report->is_expected = is_expected;
-    race_report->last_access_is_w = is_w;
-    race_report->racey_addr = addr;
-    race_report->racey_addr_description = DescribeMemory(addr);
-    race_report->last_access_tid = thr->tid();
-    race_report->last_access_sid = thr->sid();
-    race_report->last_access_size = size;
-    race_report->stack_trace = stack_trace;
-    race_report->racey_addr_was_published = is_published;
-    race_report->last_acces_lsid[false] = thr->lsid(false);
-    race_report->last_acces_lsid[true] = thr->lsid(true);
-
-    Segment *seg = Segment::Get(thr->sid());
-    (void)seg;
-    CHECK(thr->lsid(false) == seg->lsid(false));
-    CHECK(thr->lsid(true) == seg->lsid(true));
-
-    return ThreadSanitizerPrintReport(race_report);
-  }
-
-  void AnnounceThreadsInSegmentSet(SSID ssid) {
-    if (ssid.IsEmpty()) return;
-    for (int s = 0; s < SegmentSet::Size(ssid); s++) {
-      Segment *seg = SegmentSet::GetSegmentForNonSingleton(ssid, s, __LINE__);
-      TSanThread::Get(seg->tid())->Announce();
-    }
-  }
-
-
-
-  void PrintConcurrentSegmentSet(SSID ssid, TID tid, SID sid,
-                                 LSID lsid, bool is_w,
-                                 const char *descr, set<LID> *locks,
-                                 set<SID>* concurrent_sids) {
-    if (ssid.IsEmpty()) return;
-    bool printed_header = false;
-    TSanThread *thr1 = TSanThread::Get(tid);
-    for (int s = 0; s < SegmentSet::Size(ssid); s++) {
-      SID concurrent_sid = SegmentSet::GetSID(ssid, s, __LINE__);
-      Segment *seg = Segment::Get(concurrent_sid);
-      if (Segment::HappensBeforeOrSameThread(concurrent_sid, sid)) continue;
-      if (!LockSet::IntersectionIsEmpty(lsid, seg->lsid(is_w))) continue;
-      if (concurrent_sids) {
-        concurrent_sids->insert(concurrent_sid);
-      }
-      TSanThread *thr2 = TSanThread::Get(seg->tid());
-      if (!printed_header) {
-        Report("  %sConcurrent %s happened at (OR AFTER) these points:%s\n",
-               c_magenta, descr, c_default);
-        printed_header = true;
-      }
-
-      Report("   %s (%s):\n",
-             thr2->ThreadName().c_str(),
-             TwoLockSetsToString(seg->lsid(false),
-                                 seg->lsid(true)).c_str());
-      if (G_flags->show_states) {
-        Report("   S%d\n", concurrent_sid.raw());
-      }
-      LockSet::AddLocksToSet(seg->lsid(false), locks);
-      LockSet::AddLocksToSet(seg->lsid(true), locks);
-      Report("%s", Segment::StackTraceString(concurrent_sid).c_str());
-      if (!G_flags->pure_happens_before &&
-          G_flags->suggest_happens_before_arcs) {
-        set<LID> message_locks;
-        // Report("Locks in T%d\n", thr1->tid().raw());
-        // thr1->lock_history().PrintLocks();
-        // Report("Unlocks in T%d\n", thr2->tid().raw());
-        // thr2->lock_history().PrintUnlocks();
-        if (LockHistory::Intersect(thr1->lock_history(), thr2->lock_history(),
-                                   seg->lock_era(), &message_locks)) {
-          Report("   Note: these locks were recently released by T%d"
-                 " and later acquired by T%d: {%s}\n"
-                 "   See http://code.google.com/p/data-race-test/wiki/"
-                 "PureHappensBeforeVsHybrid\n",
-                 thr2->tid().raw(),
-                 thr1->tid().raw(),
-                 SetOfLocksToString(message_locks).c_str());
-          locks->insert(message_locks.begin(), message_locks.end());
-        }
-      }
-    }
-  }
-
-  void SetProgramFinished() {
-    CHECK(!program_finished_);
-    program_finished_ = true;
-  }
-
-  string RaceInfoString(uintptr_t pc, set<SID>& concurrent_sids) {
-    string s;
-    char buf[100];
-    snprintf(buf, 100, "Race verifier data: %p", (void*)pc);
-    s += buf;
-    for (set<SID>::iterator it = concurrent_sids.begin();
-         it != concurrent_sids.end(); ++it) {
-      // Take the first pc of the concurrent stack trace.
-      uintptr_t concurrent_pc = *Segment::embedded_stack_trace(*it);
-      snprintf(buf, 100, ",%p", (void*)concurrent_pc);
-      s += buf;
-    }
-    s += "\n";
-    return s;
-  }
-
-  void PrintRaceReport(ThreadSanitizerDataRaceReport *race) {
-    bool short_report = program_finished_;
-    if (!short_report) {
-      AnnounceThreadsInSegmentSet(race->new_sval.rd_ssid());
-      AnnounceThreadsInSegmentSet(race->new_sval.wr_ssid());
-    }
-    bool is_w = race->last_access_is_w;
-    TID     tid = race->last_access_tid;
-    TSanThread *thr = TSanThread::Get(tid);
-    SID     sid = race->last_access_sid;
-    LSID    lsid = race->last_acces_lsid[is_w];
-    set<LID> all_locks;
-
-    n_race_reports++;
-    if (G_flags->html) {
-      Report("<b id=race%d>Race report #%d; </b>"
-             "<a href=\"#race%d\">Next;</a>  "
-             "<a href=\"#race%d\">Prev;</a>\n",
-             n_race_reports, n_race_reports,
-             n_race_reports+1, n_race_reports-1);
-    }
-
-
-    // Note the {{{ and }}}. These are for vim folds.
-    Report("%sWARNING: %s data race during %s of size %d at %p: {{{%s\n",
-           c_red,
-           race->is_expected ? "Expected" : "Possible",
-           is_w ? "write" : "read",
-           race->last_access_size,
-           race->racey_addr,
-           c_default);
-    if (!short_report) {
-      LockSet::AddLocksToSet(race->last_acces_lsid[false], &all_locks);
-      LockSet::AddLocksToSet(race->last_acces_lsid[true], &all_locks);
-      Report("   %s (%s):\n",
-             thr->ThreadName().c_str(),
-             TwoLockSetsToString(race->last_acces_lsid[false],
-                                 race->last_acces_lsid[true]).c_str());
-    }
-
-    CHECK(race->stack_trace);
-    Report("%s", race->stack_trace->ToString().c_str());
-    if (short_report) {
-      Report(" See the full version of this report above.\n");
-      Report("}%s\n", "}}");
-      return;
-    }
-    // Report(" sid=%d; vts=%s\n", thr->sid().raw(),
-    //       thr->vts()->ToString().c_str());
-    if (G_flags->show_states) {
-      Report(" old state: %s\n", race->old_sval.ToString().c_str());
-      Report(" new state: %s\n", race->new_sval.ToString().c_str());
-    }
-    set<SID> concurrent_sids;
-    if (G_flags->keep_history) {
-      PrintConcurrentSegmentSet(race->new_sval.wr_ssid(),
-                                tid, sid, lsid, true, "write(s)", &all_locks,
-                                &concurrent_sids);
-      if (is_w) {
-        PrintConcurrentSegmentSet(race->new_sval.rd_ssid(),
-                                  tid, sid, lsid, false, "read(s)", &all_locks,
-                                  &concurrent_sids);
-      }
-    } else {
-      Report("  %sAccess history is disabled. "
-             "Consider running with --keep-history=1 for better reports.%s\n",
-             c_cyan, c_default);
-    }
-
-    if (race->racey_addr_was_published) {
-      Report(" This memory was published\n");
-    }
-    if (race->racey_addr_description.size() > 0) {
-      Report("%s", race->racey_addr_description.c_str());
-    }
-    if (race->is_expected) {
-      ExpectedRace *expected_race =
-          G_expected_races_map->GetInfo(race->racey_addr);
-      if (expected_race) {
-        CHECK(expected_race->description);
-        Report(" Description: \"%s\"\n", expected_race->description);
-      }
-    }
-    set<LID>  locks_reported;
-
-    if (!all_locks.empty()) {
-      Report("  %sLocks involved in this report "
-             "(reporting last lock sites):%s {%s}\n",
-             c_green, c_default,
-             SetOfLocksToString(all_locks).c_str());
-
-      for (set<LID>::iterator it = all_locks.begin();
-           it != all_locks.end(); ++it) {
-        LID lid = *it;
-        Lock::ReportLockWithOrWithoutContext(lid, true);
-      }
-    }
-
-    string raceInfoString = RaceInfoString(race->stack_trace->Get(0),
-        concurrent_sids);
-    Report("   %s", raceInfoString.c_str());
-    Report("}}}\n");
-  }
-
-  bool PrintReport(ThreadSanitizerReport *report) {
-    CHECK(report);
-    // Check if we have a suppression.
-    vector<string> funcs_mangled;
-    vector<string> funcs_demangled;
-    vector<string> objects;
-
-    CHECK(!g_race_verifier_active);
-    CHECK(report->stack_trace);
-    CHECK(report->stack_trace->size());
-    for (size_t i = 0; i < report->stack_trace->size(); i++) {
-      uintptr_t pc = report->stack_trace->Get(i);
-      string img, rtn, file;
-      int line;
-      PcToStrings(pc, false, &img, &rtn, &file, &line);
-      if (rtn == "(below main)" || rtn == "ThreadSanitizerStartThread")
-        break;
-
-      funcs_mangled.push_back(rtn);
-      funcs_demangled.push_back(NormalizeFunctionName(PcToRtnName(pc, true)));
-      objects.push_back(img);
-
-      if (rtn == "main")
-        break;
-    }
-    string suppression_name;
-    if (suppressions_.StackTraceSuppressed("ThreadSanitizer",
-                                           report->ReportName(),
-                                           funcs_mangled,
-                                           funcs_demangled,
-                                           objects,
-                                           &suppression_name)) {
-      used_suppressions_[suppression_name]++;
-      return false;
-    }
-
-    // Actually print it.
-    if (report->type == ThreadSanitizerReport::UNLOCK_FOREIGN) {
-      ThreadSanitizerBadUnlockReport *bad_unlock =
-          reinterpret_cast<ThreadSanitizerBadUnlockReport*>(report);
-      Report("WARNING: Lock %s was released by thread T%d"
-             " which did not acquire this lock: {{{\n%s}}}\n",
-             Lock::ToString(bad_unlock->lid).c_str(),
-             bad_unlock->tid.raw(),
-             bad_unlock->stack_trace->ToString().c_str());
-    } else if (report->type == ThreadSanitizerReport::UNLOCK_NONLOCKED) {
-      ThreadSanitizerBadUnlockReport *bad_unlock =
-          reinterpret_cast<ThreadSanitizerBadUnlockReport*>(report);
-      Report("WARNING: Unlocking a non-locked lock %s in thread T%d: "
-             "{{{\n%s}}}\n",
-             Lock::ToString(bad_unlock->lid).c_str(),
-             bad_unlock->tid.raw(),
-             bad_unlock->stack_trace->ToString().c_str());
-    } else if (report->type == ThreadSanitizerReport::INVALID_LOCK) {
-      ThreadSanitizerInvalidLockReport *invalid_lock =
-          reinterpret_cast<ThreadSanitizerInvalidLockReport*>(report);
-      Report("WARNING: accessing an invalid lock %p in thread T%d: "
-             "{{{\n%s}}}\n",
-             invalid_lock->lock_addr,
-             invalid_lock->tid.raw(),
-             invalid_lock->stack_trace->ToString().c_str());
-    } else if (report->type == ThreadSanitizerReport::ATOMICITY_VIOLATION) {
-      ThreadSanitizerAtomicityViolationReport *av =
-          reinterpret_cast<ThreadSanitizerAtomicityViolationReport*>(report);
-      Report("WARNING: Suspected atomicity violation {{{\n");
-      av->r1->Print();
-      av->r2->Print();
-      av->r3->Print();
-      Report("}}}\n");
-
-    } else {
-      CHECK(report->type == ThreadSanitizerReport::DATA_RACE);
-      ThreadSanitizerDataRaceReport *race =
-          reinterpret_cast<ThreadSanitizerDataRaceReport*>(report);
-      PrintRaceReport(race);
-    }
-
-    n_reports++;
-    SetNumberOfFoundErrors(n_reports);
-    if (!G_flags->summary_file.empty()) {
-      char buff[100];
-      snprintf(buff, sizeof(buff),
-               "ThreadSanitizer: %d warning(s) reported\n", n_reports);
-      // We overwrite the contents of this file with the new summary.
-      // We don't do that at the end because even if we crash later
-      // we will already have the summary.
-      OpenFileWriteStringAndClose(G_flags->summary_file, buff);
-    }
-
-    // Generate a suppression.
-    if (G_flags->generate_suppressions) {
-      string supp = "{\n";
-      supp += "  <Put your suppression name here>\n";
-      supp += string("  ThreadSanitizer:") + report->ReportName() + "\n";
-      for (size_t i = 0; i < funcs_mangled.size(); i++) {
-        const string &func = funcs_demangled[i];
-        if (func.size() == 0 || func == "(no symbols") {
-          supp += "  obj:" + objects[i] + "\n";
-        } else {
-          supp += "  fun:" + funcs_demangled[i] + "\n";
-        }
-        if (StackTrace::CutStackBelowFunc(funcs_demangled[i])) {
-          break;
-        }
-      }
-      supp += "}";
-      Printf("------- suppression -------\n%s\n------- end suppression -------\n",
-             supp.c_str());
-    }
-
-    return true;
-  }
-
-  void PrintUsedSuppression() {
-    for (map<string, int>::iterator it = used_suppressions_.begin();
-         it != used_suppressions_.end(); ++it) {
-      Report("used_suppression: %d %s\n", it->second, it->first.c_str());
-    }
-  }
-
-  void PrintSummary() {
-    Report("ThreadSanitizer summary: reported %d warning(s) (%d race(s))\n",
-           n_reports, n_race_reports);
-  }
-
-
-  string DescribeMemory(uintptr_t a) {
-    const int kBufLen = 1023;
-    char buff[kBufLen+1];
-
-    // Is this stack?
-    for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
-      TSanThread *t = TSanThread::Get(TID(i));
-      if (!t || !t->is_running()) continue;
-      if (t->MemoryIsInStack(a)) {
-        snprintf(buff, sizeof(buff),
-                 "  %sLocation %p is %ld bytes inside T%d's stack [%p,%p]%s\n",
-                 c_blue,
-                 reinterpret_cast<void*>(a),
-                 static_cast<long>(t->max_sp() - a),
-                 i,
-                 reinterpret_cast<void*>(t->min_sp()),
-                 reinterpret_cast<void*>(t->max_sp()),
-                 c_default
-                );
-        return buff;
-      }
-    }
-
-    HeapInfo *heap_info = G_heap_map->GetInfo(a);
-    if (heap_info) {
-      snprintf(buff, sizeof(buff),
-             "  %sLocation %p is %ld bytes inside a block starting at %p"
-             " of size %ld allocated by T%d from heap:%s\n",
-             c_blue,
-             reinterpret_cast<void*>(a),
-             static_cast<long>(a - heap_info->ptr),
-             reinterpret_cast<void*>(heap_info->ptr),
-             static_cast<long>(heap_info->size),
-             heap_info->tid().raw(), c_default);
-      return string(buff) + heap_info->StackTraceString().c_str();
-    }
-
-
-    // Is it a global object?
-    uintptr_t offset;
-    string symbol_descr;
-    if (GetNameAndOffsetOfGlobalObject(a, &symbol_descr, &offset)) {
-      snprintf(buff, sizeof(buff),
-              "  %sAddress %p is %d bytes inside data symbol \"",
-              c_blue, reinterpret_cast<void*>(a), static_cast<int>(offset));
-      return buff + symbol_descr + "\"" + c_default + "\n";
-    }
-
-    if (G_flags->debug_level >= 2) {
-      string res;
-      // Is this near stack?
-      for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
-        TSanThread *t = TSanThread::Get(TID(i));
-        const uintptr_t kMaxStackDiff = 1024 * 16;
-        uintptr_t diff1 = a - t->max_sp();
-        uintptr_t diff2 = t->min_sp() - a;
-        if (diff1 < kMaxStackDiff ||
-            diff2 < kMaxStackDiff ||
-            t->MemoryIsInStack(a)) {
-          uintptr_t diff = t->MemoryIsInStack(a) ? 0 :
-              (diff1 < kMaxStackDiff ? diff1 : diff2);
-          snprintf(buff, sizeof(buff),
-                   "  %sLocation %p is within %d bytes outside T%d's stack [%p,%p]%s\n",
-                   c_blue,
-                   reinterpret_cast<void*>(a),
-                   static_cast<int>(diff),
-                   i,
-                   reinterpret_cast<void*>(t->min_sp()),
-                   reinterpret_cast<void*>(t->max_sp()),
-                   c_default
-                  );
-          res += buff;
-        }
-      }
-      if (res.size() > 0) {
-        return res +
-            "  This report _may_ indicate that valgrind incorrectly "
-            "computed the stack boundaries\n";
-      }
-    }
-
-    return "";
-  }
-
-  void SetUnwindCallback(ThreadSanitizerUnwindCallback cb) {
-    unwind_cb_ = cb;
-  }
-
- private:
-  map<StackTrace *, int, StackTrace::Less> reported_stacks_;
-  int n_reports;
-  int n_race_reports;
-  bool program_finished_;
-  Suppressions suppressions_;
-  map<string, int> used_suppressions_;
-  ThreadSanitizerUnwindCallback unwind_cb_;
-};
-
-// -------- Event Sampling ---------------- {{{1
-// This class samples (profiles) events.
-// Instances of this class should all be static.
-class EventSampler {
- public:
-
-  // Sample one event
-  void Sample(TSanThread *thr, const char *event_name, bool need_locking) {
-    CHECK_NE(G_flags->sample_events, 0);
-    (counter_)++;
-    if ((counter_ & ((1 << G_flags->sample_events) - 1)) != 0)
-      return;
-
-    TIL til(ts_lock, 8, need_locking);
-    string pos = thr->CallStackToStringRtnOnly(G_flags->sample_events_depth);
-    (*samples_)[event_name][pos]++;
-    total_samples_++;
-    if (total_samples_ >= print_after_this_number_of_samples_) {
-      print_after_this_number_of_samples_ +=
-          print_after_this_number_of_samples_ / 2;
-      ShowSamples();
-    }
-  }
-
-  // Show existing samples
-  static void ShowSamples() {
-    if (G_flags->sample_events == 0) return;
-    Printf("ShowSamples: (all samples: %lld)\n", total_samples_);
-    for (SampleMapMap::iterator it1 = samples_->begin();
-         it1 != samples_->end(); ++it1) {
-      string name = it1->first;
-      SampleMap &m = it1->second;
-      int total = 0;
-      for (SampleMap::iterator it2 = m.begin(); it2 != m.end(); it2++) {
-        total += it2->second;
-      }
-
-      map<int, string> reverted_map;
-      for (SampleMap::iterator it2 = m.begin(); it2 != m.end(); it2++) {
-        int n_samples = it2->second;
-        if (n_samples * 1000 < total) continue;
-        reverted_map[n_samples] = it2->first;
-      }
-      Printf("%s: total samples %'d (~%'lld events)\n", name.c_str(),
-             total,
-             (int64_t)total << G_flags->sample_events);
-      for (map<int, string>::iterator it = reverted_map.begin();
-           it != reverted_map.end(); ++it) {
-        Printf("%s: %d samples (~%d%%) %s\n", name.c_str(), it->first,
-               (it->first * 100) / total, it->second.c_str());
-      }
-      Printf("\n");
-    }
-  }
-
-  static void InitClassMembers() {
-    samples_ = new SampleMapMap;
-    total_samples_ = 0;
-    print_after_this_number_of_samples_ = 1000;
-  }
-
- private:
-  int counter_;
-
-  typedef map<string, int> SampleMap;
-  typedef map<string, SampleMap> SampleMapMap;
-  static SampleMapMap *samples_;
-  static int64_t total_samples_;
-  static int64_t print_after_this_number_of_samples_;
-};
-
-EventSampler::SampleMapMap *EventSampler::samples_;
-int64_t EventSampler::total_samples_;
-int64_t EventSampler::print_after_this_number_of_samples_;
-
-// -------- Detector ---------------------- {{{1
-// Collection of event handlers.
-class Detector {
- public:
-  void INLINE HandleTraceLoop(TSanThread *thr, uintptr_t pc,
-                              MopInfo *mops,
-                              uintptr_t *tleb, size_t n,
-                              int expensive_bits, bool need_locking) {
-    bool has_expensive_flags = (expensive_bits & 4) != 0;
-    size_t i = 0;
-    uintptr_t sblock_pc = pc;
-    size_t n_locks = 0;
-    do {
-      uintptr_t addr = tleb[i];
-      if (addr == 0) continue;  // This mop was not executed.
-      MopInfo *mop = &mops[i];
-      tleb[i] = 0;  // we've consumed this mop, clear it.
-      DCHECK(mop->size() != 0);
-      DCHECK(mop->pc() != 0);
-      if ((expensive_bits & 1) && mop->is_write() == false) continue;
-      if ((expensive_bits & 2) && mop->is_write() == true) continue;
-      n_locks += HandleMemoryAccessInternal(thr, &sblock_pc, addr, mop,
-                                 has_expensive_flags,
-                                 need_locking);
-    } while (++i < n);
-    if (has_expensive_flags) {
-      const size_t mop_stat_size = TS_ARRAY_SIZE(thr->stats.mops_per_trace);
-      thr->stats.mops_per_trace[min(n, mop_stat_size - 1)]++;
-      const size_t stat_size = TS_ARRAY_SIZE(thr->stats.locks_per_trace);
-      thr->stats.locks_per_trace[min(n_locks, stat_size - 1)]++;
-    }
-  }
-
-#ifdef _MSC_VER
-  NOINLINE
-  // With MSVC, INLINE would cause the compilation to be insanely slow.
-#else
-  INLINE
-#endif
-  void HandleTrace(TSanThread *thr, MopInfo *mops, size_t n, uintptr_t pc,
-                   uintptr_t *tleb, bool need_locking) {
-    DCHECK(n);
-    // 0 bit - ignore reads, 1 bit -- ignore writes,
-    // 2 bit - has_expensive_flags.
-    int expensive_bits = thr->expensive_bits();
-
-    if (expensive_bits == 0) {
-      HandleTraceLoop(thr, pc, mops, tleb, n, 0, need_locking);
-    } else {
-      if ((expensive_bits & 3) == 3) {
-        // everything is ignored, just clear the tleb.
-        for (size_t i = 0; i < n; i++) tleb[i] = 0;
-      } else {
-        HandleTraceLoop(thr, pc, mops, tleb, n, expensive_bits, need_locking);
-      }
-    }
-    // At the end, the tleb must be cleared.
-    for (size_t i = 0; i < n; i++) DCHECK(tleb[i] == 0);
-  }
-
-  // Special case of a trace with just one mop and no sblock.
-  void INLINE HandleMemoryAccess(TSanThread *thr, uintptr_t pc,
-                                 uintptr_t addr, uintptr_t size,
-                                 bool is_w, bool need_locking) {
-    CHECK(size);
-    MopInfo mop(pc, size, is_w, false);
-    HandleTrace(thr, &mop, 1, 0/*no sblock*/, &addr, need_locking);
-  }
-
-  void ShowUnfreedHeap() {
-    // check if there is not deleted memory
-    // (for debugging free() interceptors, not for leak detection)
-    if (DEBUG_MODE && G_flags->debug_level >= 1) {
-      for (HeapMap<HeapInfo>::iterator it = G_heap_map->begin();
-           it != G_heap_map->end(); ++it) {
-        HeapInfo &info = it->second;
-        Printf("Not free()-ed memory: %p [%p, %p)\n%s\n",
-               info.size, info.ptr, info.ptr + info.size,
-               info.StackTraceString().c_str());
-      }
-    }
-  }
-
-  void FlushExpectedRaces(bool print_summary) {
-    // Report("ThreadSanitizerValgrind: done\n");
-    // check if we found all expected races (for unit tests only).
-    static int total_missing = 0;
-    int this_flush_missing = 0;
-    for (ExpectedRacesMap::iterator it = G_expected_races_map->begin();
-         it != G_expected_races_map->end(); ++it) {
-      ExpectedRace race = it->second;
-      if (debug_expected_races) {
-        Printf("Checking if expected race fired: %p\n", race.ptr);
-      }
-      if (race.count == 0 &&
-          !(g_race_verifier_active && !race.is_verifiable) &&
-          (G_flags->nacl_untrusted == race.is_nacl_untrusted)) {
-        ++this_flush_missing;
-        Printf("Missing an expected race on %p: %s (annotated at %s)\n",
-               it->first,
-               race.description,
-               PcToRtnNameAndFilePos(race.pc).c_str());
-      }
-    }
-
-    if (this_flush_missing) {
-      int n_errs = GetNumberOfFoundErrors();
-      SetNumberOfFoundErrors(n_errs + this_flush_missing);
-      total_missing += this_flush_missing;
-    }
-    G_expected_races_map->Clear();
-
-    if (print_summary && total_missing > 0)
-      Report("WARNING: %d expected race(s) NOT detected!\n", total_missing);
-  }
-
-  void HandleProgramEnd() {
-    FlushExpectedRaces(true);
-    // ShowUnfreedHeap();
-    EventSampler::ShowSamples();
-    ShowStats();
-    TraceInfo::PrintTraceProfile();
-    ShowProcSelfStatus();
-    reports_.PrintUsedSuppression();
-    reports_.PrintSummary();
-    // Report("ThreadSanitizerValgrind: exiting\n");
-  }
-
-  void FlushIfOutOfMem(TSanThread *thr) {
-    static int max_vm_size;
-    static int soft_limit;
-    const int hard_limit = G_flags->max_mem_in_mb;
-    const int minimal_soft_limit = (hard_limit * 13) / 16;
-    const int print_info_limit   = (hard_limit * 12) / 16;
-
-    CHECK(hard_limit > 0);
-
-    int vm_size_in_mb = GetVmSizeInMb();
-    if (max_vm_size < vm_size_in_mb) {
-      max_vm_size = vm_size_in_mb;
-      if (max_vm_size > print_info_limit) {
-        Report("INFO: ThreadSanitizer's VmSize: %dM\n", (int)max_vm_size);
-      }
-    }
-
-    if (soft_limit == 0) {
-      soft_limit = minimal_soft_limit;
-    }
-
-    if (vm_size_in_mb > soft_limit) {
-      ForgetAllStateAndStartOver(thr,
-          "ThreadSanitizer is running close to its memory limit");
-      soft_limit = vm_size_in_mb + 1;
-    }
-  }
-
-  // Force state flushing.
-  void FlushState(TID tid) {
-    ForgetAllStateAndStartOver(TSanThread::Get(tid), 
-                               "State flushing requested by client");
-  }
-
-  void FlushIfNeeded(TSanThread *thr) {
-    // Are we out of segment IDs?
-#ifdef TS_VALGRIND  // GetVmSizeInMb() works only with valgrind any way.
-    static int counter;
-    counter++;  // ATTENTION: don't do this in multi-threaded code -- too slow.
-    CHECK(TS_SERIALIZED == 1);
-
-    // Are we out of memory?
-    if (G_flags->max_mem_in_mb > 0) {
-      const int kFreq = 1014 * 32;
-      if ((counter % kFreq) == 0) {  // Don't do it too often.
-        // TODO(kcc): find a way to check memory limit more frequently.
-        TIL til(ts_lock, 7);
-        AssertTILHeld();
-        FlushIfOutOfMem(thr);
-      }
-    }
-#if 0
-    if ((counter % (1024 * 1024 * 64)) == 0 ||
-        counter == (1024 * 1024)) {
-      // ShowStats();
-      EventSampler::ShowSamples();
-      TraceInfo::PrintTraceProfile();
-    }
-#endif
-#endif
-
-#if 0  // do we still need it? Hope not..
-    size_t flush_period = G_flags->flush_period * 1000;  // milliseconds.
-    if (flush_period && (counter % (1024 * 4)) == 0) {
-      size_t cur_time = TimeInMilliSeconds();
-      if (cur_time - g_last_flush_time  > flush_period) {
-        TIL til(ts_lock, 7);
-        ForgetAllStateAndStartOver(
-          "Doing periodic flush (period is set by --flush_period=n_seconds)");
-      }
-    }
-#endif
-  }
-
-  void HandleRtnCall(TID tid, uintptr_t call_pc, uintptr_t target_pc,
-                     IGNORE_BELOW_RTN ignore_below) {
-    TSanThread *thr = TSanThread::Get(tid);
-    thr->HandleRtnCall(call_pc, target_pc, ignore_below);
-    FlushIfNeeded(thr);
-  }
-
-  void INLINE HandleOneEvent(Event *e) {
-    ScopedMallocCostCenter malloc_cc("HandleOneEvent");
-
-    DCHECK(e);
-    EventType type = e->type();
-    DCHECK(type != NOOP);
-    TSanThread *thr = NULL;
-    if (type != THR_START) {
-      thr = TSanThread::Get(TID(e->tid()));
-      DCHECK(thr);
-      thr->SetTopPc(e->pc());
-      thr->stats.events[type]++;
-    }
-
-    switch (type) {
-      case READ:
-        HandleMemoryAccess(thr, e->pc(), e->a(), e->info(), false, true);
-        return;
-      case WRITE:
-        HandleMemoryAccess(thr, e->pc(), e->a(), e->info(), true, true);
-        return;
-      case RTN_CALL:
-        HandleRtnCall(TID(e->tid()), e->pc(), e->a(),
-                      IGNORE_BELOW_RTN_UNKNOWN);
-        return;
-      case RTN_EXIT:
-        thr->HandleRtnExit();
-        return;
-      default: break;
-    }
-
-    // Everything else is under a lock.
-    TIL til(ts_lock, 0);
-    AssertTILHeld();
-
-
-    if (UNLIKELY(type == THR_START)) {
-        HandleThreadStart(TID(e->tid()), TID(e->info()), (CallStack*)e->pc());
-        TSanThread::Get(TID(e->tid()))->stats.events[type]++;
-        return;
-    }
-
-    FlushStateIfOutOfSegments(thr);
-
-    // Since we have the lock, get some fresh SIDs.
-    thr->GetSomeFreshSids();
-
-    switch (type) {
-      case THR_START   : CHECK(0); break;
-        break;
-      case SBLOCK_ENTER:
-        if (thr->ignore_reads() && thr->ignore_writes()) break;
-        thr->HandleSblockEnter(e->pc(), /*allow_slow_path=*/true);
-        break;
-      case THR_CREATE_BEFORE:
-        thr->HandleThreadCreateBefore(TID(e->tid()), e->pc());
-        break;
-      case THR_CREATE_AFTER:
-        thr->HandleThreadCreateAfter(TID(e->tid()), TID(e->info()));
-        break;
-      case THR_FIRST_INSN:
-        HandleThreadFirstInsn(TID(e->tid()));
-        break;
-      case THR_JOIN_AFTER     : HandleThreadJoinAfter(e);   break;
-      case THR_STACK_TOP      : HandleThreadStackTop(e); break;
-
-      case THR_END     : HandleThreadEnd(TID(e->tid()));     break;
-      case MALLOC      : HandleMalloc(e, false);     break;
-      case FREE        : HandleFree(e);         break;
-      case MMAP        : HandleMalloc(e, true);      break;  // same as MALLOC
-      case MUNMAP      : HandleMunmap(e);     break;
-
-
-      case WRITER_LOCK : thr->HandleLock(e->a(), true);     break;
-      case READER_LOCK : thr->HandleLock(e->a(), false);    break;
-      case UNLOCK      : thr->HandleUnlock(e->a());       break;
-      case UNLOCK_OR_INIT : HandleUnlockOrInit(e); break;
-
-      case LOCK_CREATE:
-      case LOCK_DESTROY: HandleLockCreateOrDestroy(e); break;
-
-      case SIGNAL      : thr->HandleSignal(e->a());  break;
-      case WAIT        : thr->HandleWait(e->a());   break;
-
-      case CYCLIC_BARRIER_INIT:
-        thr->HandleBarrierInit(e->a(), e->info());
-        break;
-      case CYCLIC_BARRIER_WAIT_BEFORE  :
-        thr->HandleBarrierWaitBefore(e->a());
-        break;
-      case CYCLIC_BARRIER_WAIT_AFTER  :
-        thr->HandleBarrierWaitAfter(e->a());
-        break;
-
-      case PCQ_CREATE   : HandlePcqCreate(e);   break;
-      case PCQ_DESTROY  : HandlePcqDestroy(e);  break;
-      case PCQ_PUT      : HandlePcqPut(e);      break;
-      case PCQ_GET      : HandlePcqGet(e);      break;
-
-
-      case EXPECT_RACE :
-        HandleExpectRace(e->a(), (const char*)e->pc(), TID(e->tid()));
-        break;
-      case BENIGN_RACE :
-        HandleBenignRace(e->a(), e->info(),
-                         (const char*)e->pc(), TID(e->tid()));
-        break;
-      case FLUSH_EXPECTED_RACES:
-        FlushExpectedRaces(false);
-        break;
-      case EXPECT_RACE_BEGIN:
-        CHECK(g_expecting_races == false);
-        g_expecting_races = true;
-        g_found_races_since_EXPECT_RACE_BEGIN = 0;
-        break;
-      case EXPECT_RACE_END:
-        CHECK(g_expecting_races == true);
-        g_expecting_races = false;
-        if (g_found_races_since_EXPECT_RACE_BEGIN == 0) {
-          int n_errs = GetNumberOfFoundErrors();
-          SetNumberOfFoundErrors(n_errs + 1);
-          Printf("WARNING: expected race not found.\n");
-        }
-        break;
-
-      case HB_LOCK     : HandleHBLock(e);       break;
-      case NON_HB_LOCK : HandleNonHBLock(e);    break;
-
-      case IGNORE_READS_BEG:  HandleIgnore(e, false, true);  break;
-      case IGNORE_READS_END:  HandleIgnore(e, false, false); break;
-      case IGNORE_WRITES_BEG: HandleIgnore(e, true, true);   break;
-      case IGNORE_WRITES_END: HandleIgnore(e, true, false);  break;
-
-      case SET_THREAD_NAME:
-        thr->set_thread_name((const char*)e->a());
-        break;
-      case SET_LOCK_NAME: {
-          uintptr_t lock_addr = e->a();
-          const char *name = reinterpret_cast<const char *>(e->info());
-          Lock *lock = Lock::LookupOrCreate(lock_addr);
-          lock->set_name(name);
-        }
-        break;
-
-      case PUBLISH_RANGE : HandlePublishRange(e); break;
-      case UNPUBLISH_RANGE :
-        Report("WARNING: ANNOTATE_UNPUBLISH_MEMORY_RANGE is deprecated\n");
-        break;
-
-      case TRACE_MEM   : HandleTraceMem(e);   break;
-      case STACK_TRACE : HandleStackTrace(e); break;
-      case NOOP        : CHECK(0);           break;  // can't happen.
-      case VERBOSITY   : e->Print(); G_flags->verbosity = e->info(); break;
-      case FLUSH_STATE : FlushState(TID(e->tid()));       break;
-      default                 : CHECK(0);    break;
-    }
-  }
-
- private:
-  void ShowProcSelfStatus() {
-    if (G_flags->show_proc_self_status) {
-      string str = ReadFileToString("/proc/self/status", false);
-      if (!str.empty()) {
-        Printf("%s", str.c_str());
-      }
-    }
-  }
-
-  void ShowStats() {
-    if (G_flags->show_stats) {
-      G_stats->PrintStats();
-      G_cache->PrintStorageStats();
-    }
-  }
-
-  // PCQ_CREATE, PCQ_DESTROY, PCQ_PUT, PCQ_GET
-  void HandlePcqCreate(Event *e) {
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-    }
-    PCQ pcq;
-    pcq.pcq_addr = e->a();
-    CHECK(!g_pcq_map->count(e->a()));
-    (*g_pcq_map)[e->a()] = pcq;
-  }
-  void HandlePcqDestroy(Event *e) {
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-    }
-    CHECK(g_pcq_map->count(e->a()));
-    g_pcq_map->erase(e->a());
-  }
-  void HandlePcqPut(Event *e) {
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-    }
-    PCQ &pcq = (*g_pcq_map)[e->a()];
-    CHECK(pcq.pcq_addr == e->a());
-    TSanThread *thread = TSanThread::Get(TID(e->tid()));
-    VTS *vts = thread->segment()->vts()->Clone();
-    pcq.putters.push_back(vts);
-    thread->NewSegmentForSignal();
-  }
-  void HandlePcqGet(Event *e) {
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-    }
-    PCQ &pcq = (*g_pcq_map)[e->a()];
-    CHECK(pcq.pcq_addr == e->a());
-    CHECK(!pcq.putters.empty());
-    VTS *putter = pcq.putters.front();
-    pcq.putters.pop_front();
-    CHECK(putter);
-    TSanThread *thread = TSanThread::Get(TID(e->tid()));
-    thread->NewSegmentForWait(putter);
-    VTS::Unref(putter);
-  }
-
-  // PUBLISH_RANGE
-  void HandlePublishRange(Event *e) {
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-    }
-    static int reported_deprecation;
-    reported_deprecation++;
-    if (reported_deprecation < 20) {
-      Report("WARNING: ANNOTATE_PUBLISH_MEMORY_RANGE is deprecated and will not"
-             " be supported in future versions of ThreadSanitizer.\n");
-    }
-
-    uintptr_t mem = e->a();
-    uintptr_t size = e->info();
-
-    TID tid(e->tid());
-    TSanThread *thread = TSanThread::Get(tid);
-    VTS *vts = thread->segment()->vts();
-    PublishRange(thread, mem, mem + size, vts);
-
-    thread->NewSegmentForSignal();
-    // Printf("Publish: [%p, %p)\n", mem, mem+size);
-  }
-
-  void HandleIgnore(Event *e, bool is_w, bool on) {
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-    }
-    TSanThread *thread = TSanThread::Get(TID(e->tid()));
-    thread->set_ignore_accesses(is_w, on);
-  }
-
-  // BENIGN_RACE
-  void HandleBenignRace(uintptr_t ptr, uintptr_t size,
-                        const char *descr, TID tid) {
-    TSanThread *thr = TSanThread::Get(tid);
-    if (debug_benign_races) {
-      Printf("T%d: BENIGN_RACE: ptr=%p size=%ld descr='%s'\n",
-             tid.raw(), ptr, size, descr);
-    }
-    // Simply set all 'racey' bits in the shadow state of [ptr, ptr+size).
-    for (uintptr_t p = ptr; p < ptr + size; p++) {
-      CacheLine *line = G_cache->GetLineOrCreateNew(thr, p, __LINE__);
-      CHECK(line);
-      line->racey().Set(CacheLine::ComputeOffset(p));
-      G_cache->ReleaseLine(thr, p, line, __LINE__);
-    }
-  }
-
-  // EXPECT_RACE
-  void HandleExpectRace(uintptr_t ptr, const char *descr, TID tid) {
-    ExpectedRace expected_race;
-    expected_race.ptr = ptr;
-    expected_race.size = 1;
-    expected_race.count = 0;
-    expected_race.is_verifiable = !descr ||
-        (string(descr).find("UNVERIFIABLE") == string::npos);
-    expected_race.is_nacl_untrusted = !descr ||
-        (string(descr).find("NACL_UNTRUSTED") != string::npos);
-    // copy descr (may not have strdup)
-    CHECK(descr);
-    size_t descr_len = strlen(descr);
-    char *d = new char [descr_len + 1];
-    memcpy(d, descr, descr_len);
-    d[descr_len] = 0;
-    expected_race.description = d;
-
-    TSanThread *thread = TSanThread::Get(tid);
-    expected_race.pc = thread->GetCallstackEntry(1);
-    G_expected_races_map->InsertInfo(ptr, expected_race);
-
-    // Flush 'racey' flag for the address
-    CacheLine *cache_line = G_cache->GetLineIfExists(thread, ptr, __LINE__);
-    if (cache_line != NULL) {
-      uintptr_t offset = CacheLine::ComputeOffset(ptr);
-      cache_line->racey().ClearRange(offset, offset + 1);
-      G_cache->ReleaseLine(thread, ptr, cache_line, __LINE__);
-    }
-
-    if (debug_expected_races) {
-      Printf("T%d: EXPECT_RACE: ptr=%p descr='%s'\n", tid.raw(), ptr, descr);
-      thread->ReportStackTrace(ptr);
-      int i = 0;
-      for (ExpectedRacesMap::iterator it = G_expected_races_map->begin();
-           it != G_expected_races_map->end(); ++it) {
-        ExpectedRace &x = it->second;
-        Printf("  [%d] %p [0x%lx]\n", i, &x, x.ptr);
-        i++;
-      }
-    }
-  }
-
-  void HandleStackTrace(Event *e) {
-    TSanThread *thread = TSanThread::Get(TID(e->tid()));
-    e->Print();
-    thread->ReportStackTrace();
-  }
-
-  // HB_LOCK
-  void HandleHBLock(Event *e) {
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-    }
-    Lock *lock = Lock::LookupOrCreate(e->a());
-    CHECK(lock);
-    lock->set_is_pure_happens_before(true);
-  }
-
-  // NON_HB_LOCK
-  void HandleNonHBLock(Event *e) {
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-    }
-    Lock *lock = Lock::LookupOrCreate(e->a());
-    CHECK(lock);
-    lock->set_is_pure_happens_before(false);
-  }
-
-  // UNLOCK_OR_INIT
-  // This is a hack to handle posix pthread_spin_unlock which is sometimes
-  // the same symbol as pthread_spin_init. We need to handle unlock as init
-  // if the lock was not seen before or if it is currently unlocked.
-  // TODO(kcc): is there a way to distinguish pthread_spin_init
-  // and pthread_spin_unlock?
-  void HandleUnlockOrInit(Event *e) {
-    TSanThread *thread = TSanThread::Get(TID(e->tid()));
-    if (G_flags->verbosity >= 2) {
-      e->Print();
-      thread->ReportStackTrace();
-    }
-    uintptr_t lock_addr = e->a();
-    Lock *lock = Lock::Lookup(lock_addr);
-    if (lock && lock->wr_held()) {
-      // We know this lock and it is locked. Just unlock it.
-      thread->HandleUnlock(lock_addr);
-    } else {
-      // Never seen this lock or it is currently unlocked. Init it.
-      Lock::Create(lock_addr);
-    }
-  }
-
-  void HandleLockCreateOrDestroy(Event *e) {
-    TSanThread *thread = TSanThread::Get(TID(e->tid()));
-    uintptr_t lock_addr = e->a();
-    if (debug_lock) {
-      e->Print();
-    }
-    if (e->type() == LOCK_CREATE) {
-      Lock::Create(lock_addr);
-    } else {
-      CHECK(e->type() == LOCK_DESTROY);
-      // A locked pthread_mutex_t can not be destroyed but other lock types can.
-      // When destroying a lock, we must unlock it.
-      // If there is a bug in a program when someone attempts to unlock
-      // a destoyed lock, we are likely to fail in an assert.
-      //
-      // We do not unlock-on-destroy after main() has exited.
-      // This is because global Mutex objects may be desctructed while threads
-      // holding them are still running. Urgh...
-      Lock *lock = Lock::Lookup(lock_addr);
-      // If the lock is not found, report an error.
-      if (lock == NULL) {
-        ThreadSanitizerInvalidLockReport *report =
-            new ThreadSanitizerInvalidLockReport;
-        report->type = ThreadSanitizerReport::INVALID_LOCK;
-        report->tid = TID(e->tid());
-        report->lock_addr = lock_addr;
-        report->stack_trace = thread->CreateStackTrace();
-        ThreadSanitizerPrintReport(report);
-        return;
-      }
-      if (lock->wr_held() || lock->rd_held()) {
-        if (G_flags->unlock_on_mutex_destroy && !g_has_exited_main) {
-          thread->HandleUnlock(lock_addr);
-        }
-      }
-      thread->HandleForgetSignaller(lock_addr);
-      Lock::Destroy(lock_addr);
-    }
-  }
-
-  void HandleTraceMem(Event *e) {
-    if (G_flags->trace_level == 0) return;
-    TID tid(e->tid());
-    TSanThread *thr = TSanThread::Get(TID(e->tid()));
-    uintptr_t a = e->a();
-    CacheLine *line = G_cache->GetLineOrCreateNew(thr, a, __LINE__);
-    uintptr_t offset = CacheLine::ComputeOffset(a);
-    line->traced().Set(offset);
-    G_cache->ReleaseLine(thr, a, line, __LINE__);
-    if (G_flags->verbosity >= 2) e->Print();
-  }
-
-  INLINE void RefAndUnrefTwoSegSetPairsIfDifferent(SSID new_ssid1,
-                                                   SSID old_ssid1,
-                                                   SSID new_ssid2,
-                                                   SSID old_ssid2) {
-    bool recycle_1 = new_ssid1 != old_ssid1,
-         recycle_2 = new_ssid2 != old_ssid2;
-    if (recycle_1 && !new_ssid1.IsEmpty()) {
-      SegmentSet::Ref(new_ssid1, "RefAndUnrefTwoSegSetPairsIfDifferent");
-    }
-
-    if (recycle_2 && !new_ssid2.IsEmpty()) {
-      SegmentSet::Ref(new_ssid2, "RefAndUnrefTwoSegSetPairsIfDifferent");
-    }
-
-    if (recycle_1 && !old_ssid1.IsEmpty()) {
-      SegmentSet::Unref(old_ssid1, "RefAndUnrefTwoSegSetPairsIfDifferent");
-    }
-
-    if (recycle_2 && !old_ssid2.IsEmpty()) {
-      SegmentSet::Unref(old_ssid2, "RefAndUnrefTwoSegSetPairsIfDifferent");
-    }
-  }
-
-
-  // return true if the current pair of read/write segment sets
-  // describes a race.
-  bool NOINLINE CheckIfRace(SSID rd_ssid, SSID wr_ssid) {
-    int wr_ss_size = SegmentSet::Size(wr_ssid);
-    int rd_ss_size = SegmentSet::Size(rd_ssid);
-
-    DCHECK(wr_ss_size >= 2 || (wr_ss_size >= 1 && rd_ss_size >= 1));
-
-    // check all write-write pairs
-    for (int w1 = 0; w1 < wr_ss_size; w1++) {
-      SID w1_sid = SegmentSet::GetSID(wr_ssid, w1, __LINE__);
-      Segment *w1_seg = Segment::Get(w1_sid);
-      LSID w1_ls = w1_seg->lsid(true);
-      for (int w2 = w1 + 1; w2 < wr_ss_size; w2++) {
-        DCHECK(wr_ssid.IsTuple());
-        SegmentSet *ss = SegmentSet::Get(wr_ssid);
-        LSID w2_ls = Segment::Get(ss->GetSID(w2))->lsid(true);
-        if (LockSet::IntersectionIsEmpty(w1_ls, w2_ls)) {
-          return true;
-        } else {
-          // May happen only if the locks in the intersection are hybrid locks.
-          DCHECK(LockSet::HasNonPhbLocks(w1_ls) &&
-                 LockSet::HasNonPhbLocks(w2_ls));
-        }
-      }
-      // check all write-read pairs
-      for (int r = 0; r < rd_ss_size; r++) {
-        SID r_sid = SegmentSet::GetSID(rd_ssid, r, __LINE__);
-        Segment *r_seg = Segment::Get(r_sid);
-        LSID r_ls = r_seg->lsid(false);
-        if (Segment::HappensBeforeOrSameThread(w1_sid, r_sid))
-          continue;
-        if (LockSet::IntersectionIsEmpty(w1_ls, r_ls)) {
-          return true;
-        } else {
-          // May happen only if the locks in the intersection are hybrid locks.
-          DCHECK(LockSet::HasNonPhbLocks(w1_ls) &&
-                 LockSet::HasNonPhbLocks(r_ls));
-        }
-      }
-    }
-    return false;
-  }
-
-  // New experimental state machine.
-  // Set *res to the new state.
-  // Return true if the new state is race.
-  bool INLINE MemoryStateMachine(ShadowValue old_sval, TSanThread *thr,
-                                 bool is_w, ShadowValue *res) {
-    ShadowValue new_sval;
-    SID cur_sid = thr->sid();
-    DCHECK(cur_sid.valid());
-
-    if (UNLIKELY(old_sval.IsNew())) {
-      // We see this memory for the first time.
-      DCHECK(cur_sid.valid());
-      if (is_w) {
-        new_sval.set(SSID(0), SSID(cur_sid));
-      } else {
-        new_sval.set(SSID(cur_sid), SSID(0));
-      }
-      *res = new_sval;
-      return false;
-    }
-
-    SSID old_rd_ssid = old_sval.rd_ssid();
-    SSID old_wr_ssid = old_sval.wr_ssid();
-    SSID new_rd_ssid(0);
-    SSID new_wr_ssid(0);
-    if (is_w) {
-      new_rd_ssid = SegmentSet::RemoveSegmentFromSS(old_rd_ssid, cur_sid);
-      new_wr_ssid = SegmentSet::AddSegmentToSS(old_wr_ssid, cur_sid);
-    } else {
-      if (SegmentSet::Contains(old_wr_ssid, cur_sid)) {
-        // cur_sid is already in old_wr_ssid, no change to SSrd is required.
-        new_rd_ssid = old_rd_ssid;
-      } else {
-        new_rd_ssid = SegmentSet::AddSegmentToSS(old_rd_ssid, cur_sid);
-      }
-      new_wr_ssid = old_wr_ssid;
-    }
-
-    if (UNLIKELY(G_flags->sample_events > 0)) {
-      if (new_rd_ssid.IsTuple() || new_wr_ssid.IsTuple()) {
-        static EventSampler sampler;
-        sampler.Sample(thr, "HasTupleSS", false);
-      }
-    }
-
-
-    new_sval.set(new_rd_ssid, new_wr_ssid);
-    *res = new_sval;
-    if (new_sval == old_sval)
-      return false;
-
-    if (new_wr_ssid.IsTuple() ||
-        (!new_wr_ssid.IsEmpty() && !new_rd_ssid.IsEmpty())) {
-      return CheckIfRace(new_rd_ssid, new_wr_ssid);
-    }
-    return false;
-  }
-
-
-  // Fast path implementation for the case when we stay in the same thread.
-  // In this case we don't need to call HappensBefore(), deal with
-  // Tuple segment sets and check for race.
-  // If this function returns true, the ShadowValue *new_sval is updated
-  // in the same way as MemoryStateMachine() would have done it. Just faster.
-  INLINE bool MemoryStateMachineSameThread(bool is_w, ShadowValue old_sval,
-                                           TSanThread *thr,
-                                           ShadowValue *new_sval) {
-#define MSM_STAT(i) do { if (DEBUG_MODE) \
-  thr->stats.msm_branch_count[i]++; } while ((void)0, 0)
-    SSID rd_ssid = old_sval.rd_ssid();
-    SSID wr_ssid = old_sval.wr_ssid();
-    SID cur_sid = thr->sid();
-    TID tid = thr->tid();
-    if (rd_ssid.IsEmpty()) {
-      if (wr_ssid.IsSingleton()) {
-        // *** CASE 01 ***: rd_ssid == 0, wr_ssid == singleton
-        SID wr_sid = wr_ssid.GetSingleton();
-        if (wr_sid == cur_sid) {  // --- w/r: {0, cur} => {0, cur}
-          MSM_STAT(1);
-          // no op
-          return true;
-        }
-        if (tid == Segment::Get(wr_sid)->tid()) {
-          // same thread, but the segments are different.
-          DCHECK(cur_sid != wr_sid);
-          if (is_w) {    // -------------- w: {0, wr} => {0, cur}
-            MSM_STAT(2);
-            new_sval->set(SSID(0), SSID(cur_sid));
-            thr->AddDeadSid(wr_sid, "FastPath01");
-          } else {       // -------------- r: {0, wr} => {cur, wr}
-            MSM_STAT(3);
-            new_sval->set(SSID(cur_sid), wr_ssid);
-          }
-          Segment::Ref(cur_sid, "FastPath01");
-          return true;
-        }
-      } else if (wr_ssid.IsEmpty()) {
-        // *** CASE 00 ***: rd_ssid == 0, wr_ssid == 0
-        if (is_w) {      // -------------- w: {0, 0} => {0, cur}
-          MSM_STAT(4);
-          new_sval->set(SSID(0), SSID(cur_sid));
-        } else {         // -------------- r: {0, 0} => {cur, 0}
-          MSM_STAT(5);
-          new_sval->set(SSID(cur_sid), SSID(0));
-        }
-        Segment::Ref(cur_sid, "FastPath00");
-        return true;
-      }
-    } else if (rd_ssid.IsSingleton()) {
-      SID rd_sid = rd_ssid.GetSingleton();
-      if (wr_ssid.IsEmpty()) {
-        // *** CASE 10 ***: rd_ssid == singleton, wr_ssid == 0
-        if (rd_sid == cur_sid) {
-          // same segment.
-          if (is_w) {    // -------------- w: {cur, 0} => {0, cur}
-            MSM_STAT(6);
-            new_sval->set(SSID(0), SSID(cur_sid));
-          } else {       // -------------- r: {cur, 0} => {cur, 0}
-            MSM_STAT(7);
-            // no op
-          }
-          return true;
-        }
-        if (tid == Segment::Get(rd_sid)->tid()) {
-          // same thread, but the segments are different.
-          DCHECK(cur_sid != rd_sid);
-          if (is_w) {  // -------------- w: {rd, 0} => {0, cur}
-            MSM_STAT(8);
-            new_sval->set(SSID(0), SSID(cur_sid));
-          } else {     // -------------- r: {rd, 0} => {cur, 0}
-            MSM_STAT(9);
-            new_sval->set(SSID(cur_sid), SSID(0));
-          }
-          Segment::Ref(cur_sid, "FastPath10");
-          thr->AddDeadSid(rd_sid, "FastPath10");
-          return true;
-        }
-      } else if (wr_ssid.IsSingleton()){
-        // *** CASE 11 ***: rd_ssid == singleton, wr_ssid == singleton
-        DCHECK(rd_ssid.IsSingleton());
-        SID wr_sid = wr_ssid.GetSingleton();
-        DCHECK(wr_sid != rd_sid);  // By definition of ShadowValue.
-        if (cur_sid == rd_sid) {
-          if (tid == Segment::Get(wr_sid)->tid()) {
-            if (is_w) {  // -------------- w: {cur, wr} => {0, cur}
-              MSM_STAT(10);
-              new_sval->set(SSID(0), SSID(cur_sid));
-              thr->AddDeadSid(wr_sid, "FastPath11");
-            } else {     // -------------- r: {cur, wr} => {cur, wr}
-              MSM_STAT(11);
-              // no op
-            }
-            return true;
-          }
-        } else if (cur_sid == wr_sid){
-          if (tid == Segment::Get(rd_sid)->tid()) {
-            if (is_w) {  // -------------- w: {rd, cur} => {rd, cur}
-              MSM_STAT(12);
-              // no op
-            } else {     // -------------- r: {rd, cur} => {0, cur}
-              MSM_STAT(13);
-              new_sval->set(SSID(0), SSID(cur_sid));
-              thr->AddDeadSid(rd_sid, "FastPath11");
-            }
-            return true;
-          }
-        } else if (tid == Segment::Get(rd_sid)->tid() &&
-                   tid == Segment::Get(wr_sid)->tid()) {
-          if (is_w) {    // -------------- w: {rd, wr} => {0, cur}
-            MSM_STAT(14);
-            new_sval->set(SSID(0), SSID(cur_sid));
-            thr->AddDeadSid(wr_sid, "FastPath11");
-          } else {       // -------------- r: {rd, wr} => {cur, wr}
-            MSM_STAT(15);
-            new_sval->set(SSID(cur_sid), wr_ssid);
-          }
-          thr->AddDeadSid(rd_sid, "FastPath11");
-          Segment::Ref(cur_sid, "FastPath11");
-          return true;
-        }
-      }
-    }
-    MSM_STAT(0);
-    return false;
-#undef MSM_STAT
-  }
-
-  // return false if we were not able to complete the task (fast_path_only).
-  INLINE bool HandleMemoryAccessHelper(bool is_w,
-                                       CacheLine *cache_line,
-                                       uintptr_t addr,
-                                       uintptr_t size,
-                                       uintptr_t pc,
-                                       TSanThread *thr,
-                                       bool fast_path_only) {
-    DCHECK((addr & (size - 1)) == 0);  // size-aligned.
-    uintptr_t offset = CacheLine::ComputeOffset(addr);
-
-    ShadowValue old_sval;
-    ShadowValue *sval_p = NULL;
-
-    if (UNLIKELY(!cache_line->has_shadow_value().Get(offset))) {
-      sval_p = cache_line->AddNewSvalAtOffset(offset);
-      DCHECK(sval_p->IsNew());
-    } else {
-      sval_p = cache_line->GetValuePointer(offset);
-    }
-    old_sval = *sval_p;
-
-    bool res = false;
-    bool fast_path_ok = MemoryStateMachineSameThread(
-        is_w, old_sval, thr, sval_p);
-    if (fast_path_ok) {
-      res = true;
-    } else if (fast_path_only) {
-      res = false;
-    } else {
-      bool is_published = cache_line->published().Get(offset);
-      // We check only the first bit for publishing, oh well.
-      if (UNLIKELY(is_published)) {
-        const VTS *signaller_vts = GetPublisherVTS(addr);
-        CHECK(signaller_vts);
-        thr->NewSegmentForWait(signaller_vts);
-      }
-
-      bool is_race = MemoryStateMachine(old_sval, thr, is_w, sval_p);
-
-      // Check for race.
-      if (UNLIKELY(is_race)) {
-        if (thr->ShouldReportRaces()) {
-          if (G_flags->report_races && !cache_line->racey().Get(offset)) {
-            reports_.AddReport(thr, pc, is_w, addr, size,
-                               old_sval, *sval_p, is_published);
-          }
-          cache_line->racey().SetRange(offset, offset + size);
-        }
-      }
-
-      // Ref/Unref segments
-      RefAndUnrefTwoSegSetPairsIfDifferent(sval_p->rd_ssid(),
-                                           old_sval.rd_ssid(),
-                                           sval_p->wr_ssid(),
-                                           old_sval.wr_ssid());
-      res = true;
-    }
-
-
-    if (DEBUG_MODE && !fast_path_only) {
-      // check that the SSIDs/SIDs in the new sval have sane ref counters.
-      CHECK(!sval_p->wr_ssid().IsEmpty() || !sval_p->rd_ssid().IsEmpty());
-      for (int i = 0; i < 2; i++) {
-        SSID ssid = i ? sval_p->rd_ssid() : sval_p->wr_ssid();
-        if (ssid.IsEmpty()) continue;
-        if (ssid.IsSingleton()) {
-          // singleton segment should have ref count > 0.
-          SID sid = ssid.GetSingleton();
-          Segment *seg = Segment::Get(sid);
-          (void)seg;
-          CHECK(seg->ref_count() > 0);
-          if (sid == thr->sid()) {
-            // if this is the current seg, ref count should be > 1.
-            CHECK(seg->ref_count() > 1);
-          }
-        } else {
-          SegmentSet *sset = SegmentSet::Get(ssid);
-          (void)sset;
-          CHECK(sset->ref_count() > 0);
-        }
-      }
-    }
-    return res;
-  }
-
-
-  // return false if we were not able to complete the task (fast_path_only).
-  INLINE bool HandleAccessGranularityAndExecuteHelper(
-      CacheLine *cache_line,
-      TSanThread *thr, uintptr_t addr, MopInfo *mop,
-      bool has_expensive_flags, bool fast_path_only) {
-    size_t size = mop->size();
-    uintptr_t pc = mop->pc();
-    bool is_w = mop->is_write();
-    uintptr_t a = addr;
-    uintptr_t b = 0;
-    uintptr_t off = CacheLine::ComputeOffset(a);
-
-    uint16_t *granularity_mask = cache_line->granularity_mask(off);
-    uint16_t gr = *granularity_mask;
-
-    // Can't do split/join on the fast path, bacause it involves segment set
-    // reference count manipulation that is not thread-safe.
-
-    if        (size == 8 && (off & 7) == 0) {
-      if (!gr) {
-        *granularity_mask = gr = 1;  // 0000000000000001
-      }
-      if (GranularityIs8(off, gr)) {
-        if (has_expensive_flags) thr->stats.n_fast_access8++;
-        cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
-        goto one_call;
-      } else {
-        if (fast_path_only) return false;
-        if (has_expensive_flags) thr->stats.n_slow_access8++;
-        cache_line->Join_1_to_2(off);
-        cache_line->Join_1_to_2(off + 2);
-        cache_line->Join_1_to_2(off + 4);
-        cache_line->Join_1_to_2(off + 6);
-        cache_line->Join_2_to_4(off);
-        cache_line->Join_2_to_4(off + 4);
-        cache_line->Join_4_to_8(off);
-        goto slow_path;
-      }
-    } else if (size == 4 && (off & 3) == 0) {
-      if (!gr) {
-        *granularity_mask = gr = 3 << 1;  // 0000000000000110
-      }
-      if (GranularityIs4(off, gr)) {
-        if (has_expensive_flags) thr->stats.n_fast_access4++;
-        cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
-        goto one_call;
-      } else {
-        if (fast_path_only) return false;
-        if (has_expensive_flags) thr->stats.n_slow_access4++;
-        cache_line->Split_8_to_4(off);
-        cache_line->Join_1_to_2(off);
-        cache_line->Join_1_to_2(off + 2);
-        cache_line->Join_2_to_4(off);
-        goto slow_path;
-      }
-    } else if (size == 2 && (off & 1) == 0) {
-      if (!gr) {
-        *granularity_mask = gr = 15 << 3;  // 0000000001111000
-      }
-      if (GranularityIs2(off, gr)) {
-        if (has_expensive_flags) thr->stats.n_fast_access2++;
-        cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
-        goto one_call;
-      } else {
-        if (fast_path_only) return false;
-        if (has_expensive_flags) thr->stats.n_slow_access2++;
-        cache_line->Split_8_to_4(off);
-        cache_line->Split_4_to_2(off);
-        cache_line->Join_1_to_2(off);
-        goto slow_path;
-      }
-    } else if (size == 1) {
-      if (!gr) {
-        *granularity_mask = gr = 255 << 7;  // 0111111110000000
-      }
-      if (GranularityIs1(off, gr)) {
-        if (has_expensive_flags) thr->stats.n_fast_access1++;
-        cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
-        goto one_call;
-      } else {
-        if (fast_path_only) return false;
-        if (has_expensive_flags) thr->stats.n_slow_access1++;
-        cache_line->Split_8_to_4(off);
-        cache_line->Split_4_to_2(off);
-        cache_line->Split_2_to_1(off);
-        goto slow_path;
-      }
-    } else {
-      if (fast_path_only) return false;
-      if (has_expensive_flags) thr->stats.n_very_slow_access++;
-      // Very slow: size is not 1,2,4,8 or address is unaligned.
-      // Handle this access as a series of 1-byte accesses, but only
-      // inside the current cache line.
-      // TODO(kcc): do we want to handle the next cache line as well?
-      b = a + mop->size();
-      uintptr_t max_x = min(b, CacheLine::ComputeNextTag(a));
-      for (uintptr_t x = a; x < max_x; x++) {
-        off = CacheLine::ComputeOffset(x);
-        DCHECK(CacheLine::ComputeTag(x) == cache_line->tag());
-        uint16_t *granularity_mask = cache_line->granularity_mask(off);
-        if (!*granularity_mask) {
-          *granularity_mask = 1;
-        }
-        cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
-        cache_line->Split_8_to_4(off);
-        cache_line->Split_4_to_2(off);
-        cache_line->Split_2_to_1(off);
-        if (!HandleMemoryAccessHelper(is_w, cache_line, x, 1, pc, thr, false))
-          return false;
-      }
-      return true;
-    }
-
-slow_path:
-    if (fast_path_only) return false;
-    DCHECK(cache_line);
-    DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
-    DCHECK((addr & (size - 1)) == 0);  // size-aligned.
-    gr = *granularity_mask;
-    CHECK(gr);
-    // size is one of 1, 2, 4, 8; address is size-aligned, but the granularity
-    // is different.
-    b = a + mop->size();
-    for (uintptr_t x = a; x < b;) {
-      if (has_expensive_flags) thr->stats.n_access_slow_iter++;
-      off = CacheLine::ComputeOffset(x);
-      cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
-      size_t s = 0;
-      // How many bytes are we going to access?
-      if     (GranularityIs8(off, gr)) s = 8;
-      else if(GranularityIs4(off, gr)) s = 4;
-      else if(GranularityIs2(off, gr)) s = 2;
-      else                             s = 1;
-      if (!HandleMemoryAccessHelper(is_w, cache_line, x, s, pc, thr, false))
-        return false;
-      x += s;
-    }
-    return true;
-one_call:
-    return HandleMemoryAccessHelper(is_w, cache_line, addr, size, pc,
-                                    thr, fast_path_only);
-  }
-
-  INLINE bool IsTraced(CacheLine *cache_line, uintptr_t addr,
-                       bool has_expensive_flags) {
-    if (!has_expensive_flags) return false;
-    if (G_flags->trace_level == 0) return false;
-    DCHECK(cache_line);
-    uintptr_t off = CacheLine::ComputeOffset(addr);
-    if (cache_line->traced().Get(off)) {
-      return true;
-    } else if (addr == G_flags->trace_addr) {
-      return true;
-    }
-    return false;
-  }
-
-  void DoTrace(TSanThread *thr, uintptr_t addr, MopInfo *mop, bool need_locking) {
-    size_t size = mop->size();
-    uintptr_t pc = mop->pc();
-    TIL til(ts_lock, 1, need_locking);
-    for (uintptr_t x = addr; x < addr + size; x++) {
-      uintptr_t off = CacheLine::ComputeOffset(x);
-      CacheLine *cache_line = G_cache->GetLineOrCreateNew(thr,
-                                                          x, __LINE__);
-      ShadowValue *sval_p = cache_line->GetValuePointer(off);
-      if (cache_line->has_shadow_value().Get(off) != 0) {
-        bool is_published = cache_line->published().Get(off);
-        Printf("TRACE: T%d/S%d %s[%d] addr=%p sval: %s%s; line=%p P=%s\n",
-               raw_tid(thr), thr->sid().raw(), mop->is_write() ? "wr" : "rd",
-               size, addr, sval_p->ToString().c_str(),
-               is_published ? " P" : "",
-               cache_line,
-               cache_line->published().Empty() ?
-               "0" : cache_line->published().ToString().c_str());
-        thr->ReportStackTrace(pc);
-      }
-      G_cache->ReleaseLine(thr, x, cache_line, __LINE__);
-    }
-  }
-
-
-#if TS_SERIALIZED == 1
-  INLINE  // TODO(kcc): this can also be made NOINLINE later.
-#else
-  NOINLINE
-#endif
-  void HandleMemoryAccessSlowLocked(TSanThread *thr,
-                                    uintptr_t addr,
-                                    MopInfo *mop,
-                                    bool has_expensive_flags,
-                                    bool need_locking) {
-    AssertTILHeld();
-    DCHECK(thr->lsid(false) == thr->segment()->lsid(false));
-    DCHECK(thr->lsid(true) == thr->segment()->lsid(true));
-    thr->FlushDeadSids();
-    if (TS_SERIALIZED == 0) {
-      // In serialized version this is the hotspot, so grab fresh SIDs
-      // only in non-serial variant.
-      thr->GetSomeFreshSids();
-    }
-    CacheLine *cache_line = G_cache->GetLineOrCreateNew(thr, addr, __LINE__);
-    HandleAccessGranularityAndExecuteHelper(cache_line, thr, addr,
-                                            mop, has_expensive_flags,
-                                            /*fast_path_only=*/false);
-    bool tracing = IsTraced(cache_line, addr, has_expensive_flags);
-    G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
-    cache_line = NULL;  // just in case.
-
-    if (has_expensive_flags) {
-      if (tracing) {
-        DoTrace(thr, addr, mop, /*need_locking=*/false);
-      }
-      if (G_flags->sample_events > 0) {
-        const char *type = "SampleMemoryAccess";
-        static EventSampler sampler;
-        sampler.Sample(thr, type, false);
-      }
-    }
-  }
-
-  INLINE bool HandleMemoryAccessInternal(TSanThread *thr,
-                                         uintptr_t *sblock_pc,
-                                         uintptr_t addr,
-                                         MopInfo *mop,
-                                         bool has_expensive_flags,
-                                         bool need_locking) {
-#   define INC_STAT(stat) \
-        do { if (has_expensive_flags) (stat)++; } while ((void)0, 0)
-    if (TS_ATOMICITY && G_flags->atomicity) {
-      HandleMemoryAccessForAtomicityViolationDetector(thr, addr, mop);
-      return false;
-    }
-    DCHECK(mop->size() > 0);
-    DCHECK(thr->is_running());
-    DCHECK(!thr->ignore_reads() || !thr->ignore_writes());
-
-    // We do not check and ignore stack now.
-    // On unoptimized binaries this would give ~10% speedup if ignore_stack==true,
-    // but if --ignore_stack==false this would cost few extra insns.
-    // On optimized binaries ignoring stack gives nearly nothing.
-    // if (thr->IgnoreMemoryIfInStack(addr)) return;
-
-    CacheLine *cache_line = NULL;
-    INC_STAT(thr->stats.memory_access_sizes[mop->size() <= 16 ? mop->size() : 17 ]);
-    INC_STAT(thr->stats.events[mop->is_write() ? WRITE : READ]);
-    if (has_expensive_flags) {
-      thr->stats.access_to_first_1g += (addr >> 30) == 0;
-      thr->stats.access_to_first_2g += (addr >> 31) == 0;
-      thr->stats.access_to_first_4g += ((uint64_t)addr >> 32) == 0;
-    }
-
-    int locked_access_case = 0;
-
-    if (need_locking) {
-      // The fast (unlocked) path.
-      if (thr->HasRoomForDeadSids()) {
-        // Acquire a line w/o locks.
-        cache_line = G_cache->TryAcquireLine(thr, addr, __LINE__);
-        if (!Cache::LineIsNullOrLocked(cache_line)) {
-          // The line is not empty or locked -- check the tag.
-          if (cache_line->tag() == CacheLine::ComputeTag(addr)) {
-            // The line is ours and non-empty -- fire the fast path.
-            if (thr->HandleSblockEnter(*sblock_pc, /*allow_slow_path=*/false)) {
-              *sblock_pc = 0;  // don't do SblockEnter any more.
-              bool res = HandleAccessGranularityAndExecuteHelper(
-                  cache_line, thr, addr,
-                  mop, has_expensive_flags,
-                  /*fast_path_only=*/true);
-              bool traced = IsTraced(cache_line, addr, has_expensive_flags);
-              // release the line.
-              G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
-              if (res && has_expensive_flags && traced) {
-                DoTrace(thr, addr, mop, /*need_locking=*/true);
-              }
-              if (res) {
-                INC_STAT(thr->stats.unlocked_access_ok);
-                // fast path succeded, we are done.
-                return false;
-              } else {
-                locked_access_case = 1;
-              }
-            } else {
-              // we were not able to handle SblockEnter.
-              G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
-              locked_access_case = 2;
-            }
-          } else {
-            locked_access_case = 3;
-            // The line has a wrong tag.
-            G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
-          }
-        } else if (cache_line == NULL) {
-          locked_access_case = 4;
-          // We grabbed the cache slot but it is empty, release it.
-          G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
-        } else {
-          locked_access_case = 5;
-        }
-      } else {
-        locked_access_case = 6;
-      }
-    } else {
-      locked_access_case = 7;
-    }
-
-    if (need_locking) {
-      INC_STAT(thr->stats.locked_access[locked_access_case]);
-    }
-
-    // Everything below goes under a lock.
-    TIL til(ts_lock, 2, need_locking);
-    thr->HandleSblockEnter(*sblock_pc, /*allow_slow_path=*/true);
-    *sblock_pc = 0;  // don't do SblockEnter any more.
-    HandleMemoryAccessSlowLocked(thr, addr, mop,
-                                 has_expensive_flags,
-                                 need_locking);
-    return true;
-#undef INC_STAT
-  }
-
-
-  void HandleMemoryAccessForAtomicityViolationDetector(TSanThread *thr,
-                                                       uintptr_t addr,
-                                                       MopInfo *mop) {
-    CHECK(G_flags->atomicity);
-    TID tid = thr->tid();
-    if (thr->MemoryIsInStack(addr)) return;
-
-    LSID wr_lsid = thr->lsid(0);
-    LSID rd_lsid = thr->lsid(1);
-    if (wr_lsid.raw() == 0 && rd_lsid.raw() == 0) {
-      thr->increment_n_mops_since_start();
-      return;
-    }
-    // uint64_t combined_lsid = wr_lsid.raw();
-    // combined_lsid = (combined_lsid << 32) | rd_lsid.raw();
-    // if (combined_lsid == 0) return;
-
-//    Printf("Era=%d T%d %s a=%p pc=%p in_stack=%d %s\n", g_lock_era,
-//           tid.raw(), is_w ? "W" : "R", addr, pc, thr->MemoryIsInStack(addr),
-//           PcToRtnNameAndFilePos(pc).c_str());
-
-    BitSet *range_set = thr->lock_era_access_set(mop->is_write());
-    // Printf("era %d T%d access under lock pc=%p addr=%p size=%p w=%d\n",
-    //        g_lock_era, tid.raw(), pc, addr, size, is_w);
-    range_set->Add(addr, addr + mop->size());
-    // Printf("   %s\n", range_set->ToString().c_str());
-  }
-
-
-  // MALLOC
-  void HandleMalloc(Event *e, bool is_mmap) {
-    ScopedMallocCostCenter cc("HandleMalloc");
-    TID tid(e->tid());
-    uintptr_t a = e->a();
-    uintptr_t size = e->info();
-
-
-    if (a == 0)
-      return;
-
-    #if defined(__GNUC__) && __WORDSIZE == 64
-    // If we are allocating a huge piece of memory,
-    // don't handle it because it is too slow.
-    // TODO(kcc): this is a workaround for NaCl. May need to fix it cleaner.
-    const uint64_t G84 = (1ULL << 32) * 21; // 84G.
-    if (size >= G84) {
-      return;
-    }
-    #endif
-    TSanThread *thr = TSanThread::Get(tid);
-    thr->NewSegmentForMallocEvent();
-    uintptr_t b = a + size;
-    CHECK(a <= b);
-    ClearMemoryState(thr, a, b);
-    // update heap_map
-    HeapInfo info;
-    info.ptr  = a;
-    info.size = size;
-    info.sid  = thr->sid();
-    Segment::Ref(info.sid, __FUNCTION__);
-    if (debug_malloc) {
-      Printf("T%d MALLOC: %p [%p %p) %s %s\n%s\n",
-             tid.raw(), size, a, a+size,
-             Segment::ToString(thr->sid()).c_str(),
-             thr->segment()->vts()->ToString().c_str(),
-             info.StackTraceString().c_str());
-    }
-
-    // CHECK(!G_heap_map->count(a));  // we may have two calls
-                                      //  to AnnotateNewMemory.
-    G_heap_map->InsertInfo(a, info);
-
-    if (is_mmap) {
-      // Mmap may be used for thread stack, so we should keep the mmap info
-      // when state is flushing.
-      ThreadStackInfo ts_info;
-      ts_info.ptr = a;
-      ts_info.size = size;
-      G_thread_stack_map->InsertInfo(a, ts_info);
-    }
-  }
-
-  void ImitateWriteOnFree(TSanThread *thr, uintptr_t a, uintptr_t size, uintptr_t pc) {
-    // Handle the memory deletion as a write, but don't touch all
-    // the memory if there is too much of it, limit with the first 1K.
-    if (size && G_flags->free_is_write && !global_ignore) {
-      const uintptr_t kMaxWriteSizeOnFree = 2048;
-      uintptr_t write_size = min(kMaxWriteSizeOnFree, size);
-      uintptr_t step = sizeof(uintptr_t);
-      // We simulate 4- or 8-byte accesses to make analysis faster.
-      for (uintptr_t i = 0; i < write_size; i += step) {
-        uintptr_t this_size = write_size - i >= step ? step : write_size - i;
-        HandleMemoryAccess(thr, pc, a + i, this_size,
-                           /*is_w=*/true, /*need_locking*/false);
-      }
-    }
-  }
-
-  // FREE
-  void HandleFree(Event *e) {
-    TID tid(e->tid());
-    TSanThread *thr = TSanThread::Get(tid);
-    uintptr_t a = e->a();
-    if (debug_free) {
-      e->Print();
-      thr->ReportStackTrace(e->pc());
-    }
-    if (a == 0)
-      return;
-    HeapInfo *info = G_heap_map->GetInfo(a);
-    if (!info || info->ptr != a)
-      return;
-    uintptr_t size = info->size;
-    uintptr_t pc = e->pc();
-    ImitateWriteOnFree(thr, a, size, pc);
-    // update G_heap_map
-    CHECK(info->ptr == a);
-    Segment::Unref(info->sid, __FUNCTION__);
-
-    ClearMemoryState(thr, a, a + size);
-    G_heap_map->EraseInfo(a);
-
-    // We imitate a Write event again, in case there will be use-after-free.
-    // We also need to create a new sblock so that the previous stack trace
-    // has free() in it.
-    if (G_flags->keep_history && G_flags->free_is_write) {
-      thr->HandleSblockEnter(pc, /*allow_slow_path*/true);
-    }
-    ImitateWriteOnFree(thr, a, size, pc);
-  }
-
-  void HandleMunmap(Event *e) {
-    // TODO(glider): at the moment we handle only munmap()s of single mmap()ed
-    // regions. The correct implementation should handle arbitrary munmap()s
-    // that may carve the existing mappings or split them into two parts.
-    // It should also be possible to munmap() several mappings at a time.
-    uintptr_t a = e->a();
-    if (a == 0)
-      return;
-    HeapInfo *h_info = G_heap_map->GetInfo(a);
-    uintptr_t size = e->info();
-    if (h_info && h_info->ptr == a && h_info->size == size) {
-      // TODO(glider): we may want to handle memory deletion and call
-      // Segment::Unref for all the unmapped memory.
-      Segment::Unref(h_info->sid, __FUNCTION__);
-      G_heap_map->EraseRange(a, a + size);
-    }
-
-    ThreadStackInfo *ts_info = G_thread_stack_map->GetInfo(a);
-    if (ts_info && ts_info->ptr == a && ts_info->size == size)
-      G_thread_stack_map->EraseRange(a, a + size);
-  }
-
-  void HandleThreadStart(TID child_tid, TID parent_tid, CallStack *call_stack) {
-    // Printf("HandleThreadStart: tid=%d parent_tid=%d pc=%lx pid=%d\n",
-    //         child_tid.raw(), parent_tid.raw(), pc, getpid());
-    VTS *vts = NULL;
-    StackTrace *creation_context = NULL;
-    if (child_tid == TID(0)) {
-      // main thread, we are done.
-      vts = VTS::CreateSingleton(child_tid);
-    } else if (!parent_tid.valid()) {
-      TSanThread::StopIgnoringAccessesInT0BecauseNewThreadStarted();
-      Report("INFO: creating thread T%d w/o a parent\n", child_tid.raw());
-      vts = VTS::CreateSingleton(child_tid);
-    } else {
-      TSanThread::StopIgnoringAccessesInT0BecauseNewThreadStarted();
-      TSanThread *parent = TSanThread::Get(parent_tid);
-      CHECK(parent);
-      parent->HandleChildThreadStart(child_tid, &vts, &creation_context);
-    }
-
-    if (!call_stack) {
-      call_stack = new CallStack();
-    }
-    TSanThread *new_thread = new TSanThread(child_tid, parent_tid,
-                                    vts, creation_context, call_stack);
-    CHECK(new_thread == TSanThread::Get(child_tid));
-    if (child_tid == TID(0)) {
-      new_thread->set_ignore_all_accesses(true); // until a new thread comes.
-    }
-  }
-
-  // Executes before the first instruction of the thread but after the thread
-  // has been set up (e.g. the stack is in place).
-  void HandleThreadFirstInsn(TID tid) {
-    // TODO(kcc): get rid of this once we find out how to get the T0's stack.
-    if (tid == TID(0)) {
-      uintptr_t stack_min(0), stack_max(0);
-      GetThreadStack(tid.raw(), &stack_min, &stack_max);
-      TSanThread *thr = TSanThread::Get(tid);
-      thr->SetStack(stack_min, stack_max);
-      ClearMemoryState(thr, thr->min_sp(), thr->max_sp());
-    }
-  }
-
-  // THR_STACK_TOP
-  void HandleThreadStackTop(Event *e) {
-    TID tid(e->tid());
-    TSanThread *thr = TSanThread::Get(tid);
-    // Stack grows from bottom up.
-    uintptr_t sp = e->a();
-    uintptr_t sp_min = 0, sp_max = 0;
-    uintptr_t stack_size_if_known = e->info();
-    ThreadStackInfo *stack_info;
-    if (stack_size_if_known) {
-      sp_min = sp - stack_size_if_known;
-      sp_max = sp;
-    } else if (NULL != (stack_info = G_thread_stack_map->GetInfo(sp))) {
-      if (debug_thread) {
-        Printf("T%d %s: %p\n%s\n", e->tid(), __FUNCTION__,  sp,
-             reports_.DescribeMemory(sp).c_str());
-      }
-      sp_min = stack_info->ptr;
-      sp_max = stack_info->ptr + stack_info->size;
-    }
-    if (debug_thread) {
-      Printf("T%d SP: %p [%p %p), size=%ldK\n",
-             e->tid(), sp, sp_min, sp_max, (sp_max - sp_min) >> 10);
-    }
-    if (sp_min < sp_max) {
-      CHECK((sp_max - sp_min) >= 8 * 1024); // stay sane.
-      CHECK((sp_max - sp_min) < 128 * 1024 * 1024); // stay sane.
-      ClearMemoryState(thr, sp_min, sp_max);
-      thr->SetStack(sp_min, sp_max);
-    }
-  }
-
-  // THR_END
-  void HandleThreadEnd(TID tid) {
-    TSanThread *thr = TSanThread::Get(tid);
-    // Add the thread-local stats to global stats.
-    G_stats->Add(thr->stats);
-    thr->stats.Clear();
-
-    // Printf("HandleThreadEnd: %d\n", tid.raw());
-    if (tid != TID(0)) {
-      TSanThread *child = TSanThread::Get(tid);
-      child->HandleThreadEnd();
-
-
-      if (debug_thread) {
-        Printf("T%d:  THR_END     : %s %s\n", tid.raw(),
-               Segment::ToString(child->sid()).c_str(),
-               child->vts()->ToString().c_str());
-      }
-      ClearMemoryState(thr, child->min_sp(), child->max_sp());
-    } else {
-      reports_.SetProgramFinished();
-    }
-
-
-    if (g_so_far_only_one_thread == false
-        && (thr->ignore_reads() || thr->ignore_writes())) {
-      Report("WARNING: T%d ended while at least one 'ignore' bit is set: "
-             "ignore_wr=%d ignore_rd=%d\n", tid.raw(),
-             thr->ignore_reads(), thr->ignore_writes());
-      for (int i = 0; i < 2; i++) {
-        StackTrace *context = thr->GetLastIgnoreContext(i);
-        if (context) {
-          Report("Last ignore_%s call was here: \n%s\n", i ? "wr" : "rd",
-                 context->ToString().c_str());
-        }
-      }
-      if (G_flags->save_ignore_context == false) {
-        Report("Rerun with --save_ignore_context to see where "
-               "IGNORE_END is missing\n");
-      }
-    }
-    ShowProcSelfStatus();
-  }
-
-  // THR_JOIN_AFTER
-  void HandleThreadJoinAfter(Event *e) {
-    TID tid(e->tid());
-    TSanThread *parent_thr = TSanThread::Get(tid);
-    VTS *vts_at_exit = NULL;
-    TID child_tid = parent_thr->HandleThreadJoinAfter(&vts_at_exit, TID(e->a()));
-    CHECK(vts_at_exit);
-    CHECK(parent_thr->sid().valid());
-    Segment::AssertLive(parent_thr->sid(),  __LINE__);
-    parent_thr->NewSegmentForWait(vts_at_exit);
-    if (debug_thread) {
-      Printf("T%d:  THR_JOIN_AFTER T%d  : %s\n", tid.raw(),
-             child_tid.raw(), parent_thr->vts()->ToString().c_str());
-    }
-  }
-
- public:
-  // TODO(kcc): merge this into Detector class. (?)
-  ReportStorage reports_;
-
-  void SetUnwindCallback(ThreadSanitizerUnwindCallback cb) {
-    reports_.SetUnwindCallback(cb);
-  }
-};
-
-static Detector        *G_detector;
-
-
-void TSanThread::HandleAtomicMop(uintptr_t a,
-                             uintptr_t pc,
-                             tsan_atomic_op op,
-                             tsan_memory_order mo,
-                             size_t size) {
-  if (op == tsan_atomic_op_fence)
-    return;
-  bool const is_store = (op != tsan_atomic_op_load);
-  CHECK(inside_atomic_op_ >= 0);
-  if (mo != tsan_memory_order_natomic)
-    inside_atomic_op_ += 1;
-  MopInfo mop (pc, size, is_store, true);
-  G_detector->HandleTrace(this, &mop, 1, pc, &a, false);
-  if (mo != tsan_memory_order_natomic)
-    inside_atomic_op_ -= 1;
-  CHECK(inside_atomic_op_ >= 0);
-}
-
-
-// -------- Flags ------------------------- {{{1
-const char *usage_str =
-"Usage:\n"
-"  %s [options] program_to_test [program's options]\n"
-"See %s for details\n";
-
-void ThreadSanitizerPrintUsage() {
-  Printf(usage_str, G_flags->tsan_program_name.c_str(),
-         G_flags->tsan_url.c_str());
-}
-
-static void ReportUnknownFlagAndExit(const string &str) {
-  Printf("Unknown flag or flag value: %s\n", str.c_str());
-  ThreadSanitizerPrintUsage();
-  exit(1);
-}
-
-// if arg and flag match, return true
-// and set 'val' to the substring of arg after '='.
-static bool FlagNameMatch(const string &arg, const string &flag, string *val) {
-  string f = string("--") + flag;
-  if (arg.size() < f.size()) return false;
-  for (size_t i = 0; i < f.size(); i++) {
-    // '-' must match '-'
-    // '_' may match '_' or '-'
-    if (f[i] == '_') {
-      if (arg[i] != '-' && arg[i] != '_') return false;
-    } else {
-      if (f[i] != arg[i]) return false;
-    }
-  }
-  if (arg.size() == f.size()) {
-    *val = "";
-    return true;
-  }
-  if (arg[f.size()] != '=') return false;
-  *val = arg.substr(f.size() + 1);
-  return true;
-}
-
-static int FindBoolFlag(const char *name, bool default_val,
-                  vector<string> *args, bool *retval) {
-  int res = 0;
-  *retval = default_val;
-  bool cont = false;
-  do {
-    cont = false;
-    vector<string>::iterator it = args->begin();
-    for (; it != args->end(); ++it) {
-      string &str = *it;
-      string flag_value;
-      if (!FlagNameMatch(str, name, &flag_value)) continue;
-
-      if (flag_value == "")            *retval = true;
-      else if (flag_value == "1")     *retval = true;
-      else if (flag_value == "true")  *retval = true;
-      else if (flag_value == "yes")   *retval = true;
-      else if (flag_value == "0")     *retval = false;
-      else if (flag_value == "false") *retval = false;
-      else if (flag_value == "no")    *retval = false;
-      else
-        ReportUnknownFlagAndExit(str);
-      res++;
-      if (G_flags->verbosity >= 1) {
-        Printf("%40s => %s\n", name, *retval ? "true" : "false");
-      }
-      break;
-    }
-    if (it != args->end()) {
-      cont = true;
-      args->erase(it);
-    }
-  } while (cont);
-  return res;
-}
-
-static void FindIntFlag(const char *name, intptr_t default_val,
-                 vector<string> *args, intptr_t *retval) {
-  *retval = default_val;
-  bool cont = false;
-  do {
-    cont = false;
-    vector<string>::iterator it = args->begin();
-    for (; it != args->end(); ++it) {
-      string &str = *it;
-      string flag_value;
-      if (!FlagNameMatch(str, name, &flag_value)) continue;
-      char *end_ptr;
-      const char *beg_ptr = flag_value.c_str();
-      intptr_t int_val = my_strtol(beg_ptr, &end_ptr, 0);
-      if (flag_value.empty() || beg_ptr + flag_value.size() != end_ptr)
-        ReportUnknownFlagAndExit(str);
-      *retval = int_val;
-      if (G_flags->verbosity >= 1) {
-        Printf("%40s => %ld\n", name, *retval);
-      }
-      break;
-    }
-    if (it != args->end()) {
-      cont = true;
-      args->erase(it);
-    }
-  } while (cont);
-}
-
-static void FindUIntFlag(const char *name, intptr_t default_val,
-                 vector<string> *args, uintptr_t *retval) {
-  intptr_t signed_int;
-  FindIntFlag(name, default_val, args, &signed_int);
-  CHECK_GE(signed_int, 0);
-  *retval = signed_int;
-}
-
-void FindStringFlag(const char *name, vector<string> *args,
-                    vector<string> *retval) {
-  bool cont = false;
-  do {
-    cont = false;
-    vector<string>::iterator it = args->begin();
-    for (; it != args->end(); ++it) {
-      string &str = *it;
-      string flag_value;
-      if (!FlagNameMatch(str, name, &flag_value)) continue;
-      retval->push_back(flag_value);
-      if (G_flags->verbosity >= 1) {
-        Printf("%40s => %s\n", name, flag_value.c_str());
-      }
-      break;
-    }
-    if (it != args->end()) {
-      cont = true;
-      args->erase(it);
-    }
-  } while (cont);
-}
-
-void FindStringFlag(const char *name, vector<string> *args,
-                    string *retval) {
-  vector<string> tmp;
-  FindStringFlag(name, args, &tmp);
-  if (tmp.size() > 0) {
-    *retval = tmp.back();
-  }
-}
-
-static size_t GetMemoryLimitInMbFromProcSelfLimits() {
-#ifdef VGO_linux
-  // Parse the memory limit section of /proc/self/limits.
-  string proc_self_limits = ReadFileToString("/proc/self/limits", false);
-  const char *max_addr_space = "Max address space";
-  size_t pos = proc_self_limits.find(max_addr_space);
-  if (pos == string::npos) return 0;
-  pos += strlen(max_addr_space);
-  while (proc_self_limits[pos] == ' ') pos++;
-  if (proc_self_limits[pos] == 'u')
-    return 0;  // 'unlimited'.
-  char *end;
-  size_t result = my_strtol(proc_self_limits.c_str() + pos, &end, 0);
-  result >>= 20;
-  return result;
-#else
-  return 0;
-#endif
-}
-
-static size_t GetMemoryLimitInMb() {
-  size_t ret = -1;  // Maximum possible value.
-#if defined(VGO_linux) && __WORDSIZE == 32
-  // Valgrind doesn't support more than 3G per process on 32-bit Linux.
-  ret = 3 * 1024;
-#endif
-
-  // Try /proc/self/limits.
-  size_t from_proc_self = GetMemoryLimitInMbFromProcSelfLimits();
-  if (from_proc_self && ret > from_proc_self) {
-    ret = from_proc_self;
-  }
-  // Try env.
-  const char *from_env_str =
-    (const char*)getenv("VALGRIND_MEMORY_LIMIT_IN_MB");
-  if (from_env_str) {
-    char *end;
-    size_t from_env_value = (size_t)my_strtol(from_env_str, &end, 0);
-    if (ret > from_env_value)
-      ret = from_env_value;
-  }
-  if (ret == (size_t)-1)
-    return 0;
-  return ret;
-}
-
-bool PhaseDebugIsOn(const char *phase_name) {
-  CHECK(G_flags);
-  for (size_t i = 0; i < G_flags->debug_phase.size(); i++) {
-    if (G_flags->debug_phase[i] == phase_name)
-      return true;
-  }
-  return false;
-}
-
-void ThreadSanitizerParseFlags(vector<string> *args) {
-#ifdef TS_OFFLINE
-  string input_type_tmp;
-  FindStringFlag("input_type", args, &input_type_tmp);
-  if (input_type_tmp.size() > 0) {
-    G_flags->input_type = input_type_tmp;
-  } else {
-    G_flags->input_type = "str";
-  }
-#endif
-
-  // Check this first.
-  FindIntFlag("v", 0, args, &G_flags->verbosity);
-
-  FindBoolFlag("ignore_stack", false, args, &G_flags->ignore_stack);
-  FindIntFlag("keep_history", 1, args, &G_flags->keep_history);
-  FindUIntFlag("segment_set_recycle_queue_size", DEBUG_MODE ? 10 : 10000, args,
-               &G_flags->segment_set_recycle_queue_size);
-  FindUIntFlag("recent_segments_cache_size", 10, args,
-               &G_flags->recent_segments_cache_size);
-
-  bool fast_mode = false;
-  FindBoolFlag("fast_mode", false, args, &fast_mode);
-  if (fast_mode) {
-    Printf("INFO: --fast-mode is deprecated\n");
-  }
-  bool ignore_in_dtor = false;
-  FindBoolFlag("ignore_in_dtor", false, args, &ignore_in_dtor);
-  if (ignore_in_dtor) {
-    Printf("INFO: --ignore-in-dtor is deprecated\n");
-  }
-
-  int has_phb = FindBoolFlag("pure_happens_before", true, args,
-                              &G_flags->pure_happens_before);
-  bool hybrid = false;
-  int has_hyb = FindBoolFlag("hybrid", false, args, &hybrid);
-  if (has_hyb && has_phb) {
-    Printf("INFO: --hybrid and --pure-happens-before"
-           " is mutually exclusive; ignoring the --hybrid switch\n");
-  } else if (has_hyb && !has_phb) {
-    G_flags->pure_happens_before = !hybrid;
-  }
-
-  FindBoolFlag("show_expected_races", false, args,
-               &G_flags->show_expected_races);
-  FindBoolFlag("demangle", true, args, &G_flags->demangle);
-
-  FindBoolFlag("announce_threads", false, args, &G_flags->announce_threads);
-  FindBoolFlag("full_output", false, args, &G_flags->full_output);
-  FindBoolFlag("show_states", false, args, &G_flags->show_states);
-  FindBoolFlag("show_proc_self_status", false, args,
-               &G_flags->show_proc_self_status);
-  FindBoolFlag("show_valgrind_context", false, args,
-               &G_flags->show_valgrind_context);
-  FindBoolFlag("suggest_happens_before_arcs", true, args,
-               &G_flags->suggest_happens_before_arcs);
-  FindBoolFlag("show_pc", false, args, &G_flags->show_pc);
-  FindBoolFlag("full_stack_frames", false, args, &G_flags->full_stack_frames);
-  FindBoolFlag("free_is_write", true, args, &G_flags->free_is_write);
-  FindBoolFlag("exit_after_main", false, args, &G_flags->exit_after_main);
-
-  FindIntFlag("show_stats", 0, args, &G_flags->show_stats);
-  FindBoolFlag("trace_profile", false, args, &G_flags->trace_profile);
-  FindBoolFlag("color", false, args, &G_flags->color);
-  FindBoolFlag("html", false, args, &G_flags->html);
-#ifdef TS_OFFLINE
-  bool show_pid_default = false;
-#else
-  bool show_pid_default = true;
-#endif
-  FindBoolFlag("show_pid", show_pid_default, args, &G_flags->show_pid);
-  FindBoolFlag("save_ignore_context", DEBUG_MODE ? true : false, args,
-               &G_flags->save_ignore_context);
-
-  FindIntFlag("dry_run", 0, args, &G_flags->dry_run);
-  FindBoolFlag("report_races", true, args, &G_flags->report_races);
-  FindIntFlag("locking_scheme", 1, args, &G_flags->locking_scheme);
-  FindBoolFlag("unlock_on_mutex_destroy", true, args,
-               &G_flags->unlock_on_mutex_destroy);
-
-  FindIntFlag("sample_events", 0, args, &G_flags->sample_events);
-  FindIntFlag("sample_events_depth", 2, args, &G_flags->sample_events_depth);
-
-  FindIntFlag("debug_level", 1, args, &G_flags->debug_level);
-  FindStringFlag("debug_phase", args, &G_flags->debug_phase);
-  FindIntFlag("trace_level", 0, args, &G_flags->trace_level);
-
-  FindIntFlag("literace_sampling", 0, args, &G_flags->literace_sampling);
-  FindIntFlag("sampling", 0, args, &G_flags->literace_sampling);
-  CHECK(G_flags->literace_sampling < 32);
-  CHECK(G_flags->literace_sampling >= 0);
-  FindBoolFlag("start_with_global_ignore_on", false, args,
-               &G_flags->start_with_global_ignore_on);
-
-  FindStringFlag("fullpath_after", args, &G_flags->file_prefix_to_cut);
-  FindStringFlag("file_prefix_to_cut", args, &G_flags->file_prefix_to_cut);
-  for (size_t i = 0; i < G_flags->file_prefix_to_cut.size(); i++) {
-    G_flags->file_prefix_to_cut[i] =
-        ConvertToPlatformIndependentPath(G_flags->file_prefix_to_cut[i]);
-  }
-
-  FindStringFlag("ignore", args, &G_flags->ignore);
-  FindStringFlag("whitelist", args, &G_flags->whitelist);
-  FindBoolFlag("ignore_unknown_pcs", false, args, &G_flags->ignore_unknown_pcs);
-
-  FindBoolFlag("thread_coverage", false, args, &G_flags->thread_coverage);
-  
-  FindBoolFlag("atomicity", false, args, &G_flags->atomicity);
-  if (G_flags->atomicity) {
-    // When doing atomicity violation checking we should not 
-    // create h-b arcs between Unlocks and Locks.
-    G_flags->pure_happens_before = false;
-  }
-
-  FindBoolFlag("call_coverage", false, args, &G_flags->call_coverage);
-  FindStringFlag("dump_events", args, &G_flags->dump_events);
-  FindBoolFlag("symbolize", true, args, &G_flags->symbolize);
-
-  FindIntFlag("trace_addr", 0, args,
-              reinterpret_cast<intptr_t*>(&G_flags->trace_addr));
-
-  FindIntFlag("max_mem_in_mb", 0, args, &G_flags->max_mem_in_mb);
-  FindBoolFlag("offline", false, args, &G_flags->offline);
-  FindBoolFlag("attach_mode", false, args, &G_flags->attach_mode);
-  if (G_flags->max_mem_in_mb == 0) {
-    G_flags->max_mem_in_mb = GetMemoryLimitInMb();
-  }
-
-  vector<string> summary_file_tmp;
-  FindStringFlag("summary_file", args, &summary_file_tmp);
-  if (summary_file_tmp.size() > 0) {
-    G_flags->summary_file = summary_file_tmp.back();
-  }
-
-  vector<string> log_file_tmp;
-  FindStringFlag("log_file", args, &log_file_tmp);
-  if (log_file_tmp.size() > 0) {
-    G_flags->log_file = log_file_tmp.back();
-  }
-
-  G_flags->tsan_program_name = "valgrind --tool=tsan";
-  FindStringFlag("tsan_program_name", args, &G_flags->tsan_program_name);
-
-  G_flags->tsan_url = "http://code.google.com/p/data-race-test";
-  FindStringFlag("tsan_url", args, &G_flags->tsan_url);
-
-  FindStringFlag("suppressions", args, &G_flags->suppressions);
-  FindBoolFlag("gen_suppressions", false, args,
-               &G_flags->generate_suppressions);
-
-  FindIntFlag("error_exitcode", 0, args, &G_flags->error_exitcode);
-  FindIntFlag("flush_period", 0, args, &G_flags->flush_period);
-  FindBoolFlag("trace_children", false, args, &G_flags->trace_children);
-
-  FindIntFlag("max_sid", kMaxSID, args, &G_flags->max_sid);
-  kMaxSID = G_flags->max_sid;
-  if (kMaxSID <= 100000) {
-    Printf("Error: max-sid should be at least 100000. Exiting\n");
-    exit(1);
-  }
-  FindIntFlag("max_sid_before_flush", (kMaxSID * 15) / 16, args, 
-              &G_flags->max_sid_before_flush);
-  kMaxSIDBeforeFlush = G_flags->max_sid_before_flush;
-
-  FindIntFlag("num_callers_in_history", kSizeOfHistoryStackTrace, args,
-              &G_flags->num_callers_in_history);
-  kSizeOfHistoryStackTrace = G_flags->num_callers_in_history;
-
-  // Cut stack under the following default functions.
-  G_flags->cut_stack_below.push_back("TSanThread*ThreadBody*");
-  G_flags->cut_stack_below.push_back("ThreadSanitizerStartThread");
-  G_flags->cut_stack_below.push_back("start_thread");
-  G_flags->cut_stack_below.push_back("BaseThreadInitThunk");
-  FindStringFlag("cut_stack_below", args, &G_flags->cut_stack_below);
-
-  FindIntFlag("num_callers", 16, args, &G_flags->num_callers);
-
-  G_flags->max_n_threads        = 100000;
-
-  if (G_flags->full_output) {
-    G_flags->announce_threads = true;
-    G_flags->show_pc = true;
-    G_flags->full_stack_frames = true;
-    G_flags->show_states = true;
-    G_flags->file_prefix_to_cut.clear();
-  }
-
-  FindIntFlag("race_verifier_sleep_ms", 100, args,
-      &G_flags->race_verifier_sleep_ms);
-  FindStringFlag("race_verifier", args, &G_flags->race_verifier);
-  FindStringFlag("race_verifier_extra", args, &G_flags->race_verifier_extra);
-  g_race_verifier_active =
-      !(G_flags->race_verifier.empty() && G_flags->race_verifier_extra.empty());
-  if (g_race_verifier_active) {
-    Printf("INFO: ThreadSanitizer running in Race Verifier mode.\n");
-  }
-
-  FindBoolFlag("nacl_untrusted", false, args, &G_flags->nacl_untrusted);
-  FindBoolFlag("threaded_analysis", false, args, &G_flags->threaded_analysis);
-
-  FindBoolFlag("sched_shake", false, args, &G_flags->sched_shake);
-  FindBoolFlag("api_ambush", false, args, &G_flags->api_ambush);
-
-  FindBoolFlag("enable_atomic", false, args, &G_flags->enable_atomic);
-
-  if (!args->empty()) {
-    ReportUnknownFlagAndExit(args->front());
-  }
-
-  debug_expected_races = PhaseDebugIsOn("expected_races");
-  debug_benign_races = PhaseDebugIsOn("benign_races");
-  debug_malloc = PhaseDebugIsOn("malloc");
-  debug_free = PhaseDebugIsOn("free");
-  debug_thread = PhaseDebugIsOn("thread");
-  debug_ignore = PhaseDebugIsOn("ignore");
-  debug_rtn = PhaseDebugIsOn("rtn");
-  debug_lock = PhaseDebugIsOn("lock");
-  debug_wrap = PhaseDebugIsOn("wrap");
-  debug_ins = PhaseDebugIsOn("ins");
-  debug_shadow_stack = PhaseDebugIsOn("shadow_stack");
-  debug_happens_before = PhaseDebugIsOn("happens_before");
-  debug_cache = PhaseDebugIsOn("cache");
-  debug_race_verifier = PhaseDebugIsOn("race_verifier");
-  debug_atomic = PhaseDebugIsOn("atomic");
-}
-
-// -------- ThreadSanitizer ------------------ {{{1
-
-// Setup the list of functions/images/files to ignore.
-static void SetupIgnore() {
-  g_ignore_lists = new IgnoreLists;
-  g_white_lists = new IgnoreLists;
-
-  // Add some major ignore entries so that tsan remains sane
-  // even w/o any ignore file. First - for all platforms.
-  g_ignore_lists->ignores.push_back(IgnoreFun("ThreadSanitizerStartThread"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("exit"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("longjmp"));
-
-  // Dangerous: recursively ignoring vfprintf hides races on printf arguments.
-  // See PrintfTests in unittest/racecheck_unittest.cc
-  // TODO(eugenis): Do something about this.
-  // http://code.google.com/p/data-race-test/issues/detail?id=53
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("vfprintf"));
-
-  // do not create segments in our Replace_* functions
-  g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_memcpy"));
-  g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_memchr"));
-  g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strcpy"));
-  g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strchr"));
-  g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strchrnul"));
-  g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strrchr"));
-  g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strlen"));
-  g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strcmp"));
-
-  // Ignore everything in our own file.
-  g_ignore_lists->ignores.push_back(IgnoreFile("*ts_valgrind_intercepts.c"));
-
-#ifndef _MSC_VER
-  // POSIX ignores
-  g_ignore_lists->ignores.push_back(IgnoreObj("*/libpthread*"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*/ld-2*.so"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create@*"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create_WRK"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("__cxa_*"));
-  g_ignore_lists->ignores.push_back(
-      IgnoreFun("*__gnu_cxx*__exchange_and_add*"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("__lll_mutex_*"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("__lll_*lock_*"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("__fprintf_chk"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("_IO_file_xsputn*"));
-  // fflush internals
-  g_ignore_lists->ignores.push_back(IgnoreFun("_IO_adjust_column"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("_IO_flush_all_lockp"));
-
-  g_ignore_lists->ignores.push_back(IgnoreFun("__sigsetjmp"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("__sigjmp_save"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("_setjmp"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("_longjmp_unwind"));
-
-  g_ignore_lists->ignores.push_back(IgnoreFun("__mktime_internal"));
-
-  // http://code.google.com/p/data-race-test/issues/detail?id=40
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("_ZNSsD1Ev"));
-
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("gaih_inet"));
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("getaddrinfo"));
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("gethostbyname2_r"));
-
-  #ifdef VGO_darwin
-    // Mac-only ignores
-    g_ignore_lists->ignores.push_back(IgnoreObj("/usr/lib/dyld"));
-    g_ignore_lists->ignores.push_back(IgnoreObj("/usr/lib/libobjc.A.dylib"));
-    g_ignore_lists->ignores.push_back(IgnoreObj("*/libSystem.*.dylib"));
-    g_ignore_lists->ignores_r.push_back(IgnoreFun("__CFDoExternRefOperation"));
-    g_ignore_lists->ignores_r.push_back(IgnoreFun("_CFAutoreleasePoolPop"));
-    g_ignore_lists->ignores_r.push_back(IgnoreFun("_CFAutoreleasePoolPush"));
-    g_ignore_lists->ignores_r.push_back(IgnoreFun("OSAtomicAdd32"));
-    g_ignore_lists->ignores_r.push_back(IgnoreTriple("_dispatch_Block_copy",
-                                            "/usr/lib/libSystem.B.dylib", "*"));
-
-    // pthread_lib_{enter,exit} shouldn't give us any reports since they
-    // have IGNORE_ALL_ACCESSES_BEGIN/END but they do give the reports...
-    g_ignore_lists->ignores_r.push_back(IgnoreFun("pthread_lib_enter"));
-    g_ignore_lists->ignores_r.push_back(IgnoreFun("pthread_lib_exit"));
-  #endif
-#else
-  // Windows-only ignores
-  g_ignore_lists->ignores.push_back(IgnoreObj("*ole32.dll"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*OLEAUT32.dll"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*MSCTF.dll"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*ntdll.dll"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*mswsock.dll"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*WS2_32.dll"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*msvcrt.dll"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*kernel32.dll"));
-  g_ignore_lists->ignores.push_back(IgnoreObj("*ADVAPI32.DLL"));
-
-  g_ignore_lists->ignores.push_back(IgnoreFun("_EH_epilog3"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("_EH_prolog3_catch"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("unnamedImageEntryPoint"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("_Mtxunlock"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("IsNLSDefinedString"));
-
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("RtlDestroyQueryDebugBuffer"));
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("BCryptGenerateSymmetricKey"));
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("SHGetItemFromDataObject"));
-
-  // http://code.google.com/p/data-race-test/issues/detail?id=53
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("_stbuf"));
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("_getptd"));
-
-  // TODO(timurrrr): Add support for FLS (fiber-local-storage)
-  // http://code.google.com/p/data-race-test/issues/detail?id=55
-  g_ignore_lists->ignores_r.push_back(IgnoreFun("_freefls"));
-#endif
-
-#ifdef ANDROID
-  // Android does not have a libpthread; pthread_* functions live in libc.
-  // We have to ignore them one-by-one.
-  g_ignore_lists->ignores.push_back(IgnoreFun("pthread_*"));
-  g_ignore_lists->ignores.push_back(IgnoreFun("__init_tls"));
-#endif
-
-  // Now read the ignore/whitelist files.
-  for (size_t i = 0; i < G_flags->ignore.size(); i++) {
-    string file_name = G_flags->ignore[i];
-    Report("INFO: Reading ignore file: %s\n", file_name.c_str());
-    string str = ReadFileToString(file_name, true);
-    ReadIgnoresFromString(str, g_ignore_lists);
-  }
-  for (size_t i = 0; i < G_flags->whitelist.size(); i++) {
-    string file_name = G_flags->whitelist[i];
-    Report("INFO: Reading whitelist file: %s\n", file_name.c_str());
-    string str = ReadFileToString(file_name, true);
-    ReadIgnoresFromString(str, g_white_lists);
-  }
-}
-
-void ThreadSanitizerSetUnwindCallback(ThreadSanitizerUnwindCallback cb) {
-  G_detector->SetUnwindCallback(cb);
-}
-
-void ThreadSanitizerNaclUntrustedRegion(uintptr_t mem_start, uintptr_t mem_end) {
-  g_nacl_mem_start = mem_start;
-  g_nacl_mem_end = mem_end;
-}
-
-bool AddrIsInNaclUntrustedRegion(uintptr_t addr) {
-  return addr >= g_nacl_mem_start && addr < g_nacl_mem_end;
-}
-
-bool ThreadSanitizerIgnoreForNacl(uintptr_t addr) {
-  // Ignore trusted addresses if tracing untrusted code, and ignore untrusted
-  // addresses otherwise.
-  return G_flags->nacl_untrusted != AddrIsInNaclUntrustedRegion(addr);
-}
-
-bool ThreadSanitizerWantToInstrumentSblock(uintptr_t pc) {
-  string img_name, rtn_name, file_name;
-  int line_no;
-  G_stats->pc_to_strings++;
-  PcToStrings(pc, false, &img_name, &rtn_name, &file_name, &line_no);
-
-  if (g_white_lists->ignores.size() > 0) {
-    bool in_white_list = TripleVectorMatchKnown(g_white_lists->ignores, 
-                                                rtn_name, img_name, file_name);
-    if (in_white_list) {
-      if (debug_ignore) {
-        Report("INFO: Whitelisted rtn: %s\n", rtn_name.c_str());
-      }
-    } else {
-      return false;
-    }
-  }
-
-  if (G_flags->ignore_unknown_pcs && rtn_name == "(no symbols)") {
-    if (debug_ignore) {
-      Report("INFO: not instrumenting unknown function at %p\n", pc);
-    }
-    return false;
-  }
-
-  bool ignore = TripleVectorMatchKnown(g_ignore_lists->ignores,
-                                       rtn_name, img_name, file_name) ||
-                TripleVectorMatchKnown(g_ignore_lists->ignores_r,
-                                       rtn_name, img_name, file_name);
-  if (debug_ignore) {
-    Printf("%s: pc=%p file_name=%s img_name=%s rtn_name=%s ret=%d\n",
-           __FUNCTION__, pc, file_name.c_str(), img_name.c_str(),
-           rtn_name.c_str(), !ignore);
-  }
-  bool nacl_ignore = ThreadSanitizerIgnoreForNacl(pc);
-  return !(ignore || nacl_ignore);
-}
-
-bool ThreadSanitizerWantToCreateSegmentsOnSblockEntry(uintptr_t pc) {
-  string rtn_name;
-  rtn_name = PcToRtnName(pc, false);
-  if (G_flags->keep_history == 0)
-    return false;
-  return !(TripleVectorMatchKnown(g_ignore_lists->ignores_hist,
-                                  rtn_name, "", ""));
-}
-
-// Returns true if function at "pc" is marked as "fun_r" in the ignore file.
-bool NOINLINE ThreadSanitizerIgnoreAccessesBelowFunction(uintptr_t pc) {
-  ScopedMallocCostCenter cc(__FUNCTION__);
-  typedef unordered_map<uintptr_t, bool> Cache;
-  static Cache *cache = NULL;
-  {
-    TIL ignore_below_lock(ts_ignore_below_lock, 18);
-    if (!cache)
-      cache = new Cache;
-
-    // Fast path - check if we already know the answer.
-    Cache::iterator i = cache->find(pc);
-    if (i != cache->end())
-      return i->second;
-  }
-
-  string rtn_name = PcToRtnName(pc, false);
-  bool ret =
-      TripleVectorMatchKnown(g_ignore_lists->ignores_r, rtn_name, "", "");
-
-  if (DEBUG_MODE) {
-    // Heavy test for NormalizeFunctionName: test on all possible inputs in
-    // debug mode. TODO(timurrrr): Remove when tested.
-    NormalizeFunctionName(PcToRtnName(pc, true));
-  }
-
-  // Grab the lock again
-  TIL ignore_below_lock(ts_ignore_below_lock, 19);
-  if (ret && debug_ignore) {
-    Report("INFO: ignoring all accesses below the function '%s' (%p)\n",
-           PcToRtnNameAndFilePos(pc).c_str(), pc);
-  }
-  return ((*cache)[pc] = ret);
-}
-
-// We intercept a user function with this name
-// and answer the user query with a non-NULL string.
-extern "C" const char *ThreadSanitizerQuery(const char *query) {
-  const char *ret = "0";
-  string str(query);
-  if (str == "pure_happens_before" && G_flags->pure_happens_before == true) {
-    ret = "1";
-  }
-  if (str == "hybrid_full" &&
-      G_flags->pure_happens_before == false) {
-    ret = "1";
-  }
-  if (str == "race_verifier" && g_race_verifier_active == true) {
-    ret = "1";
-  }
-  if (DEBUG_MODE && G_flags->debug_level >= 2) {
-    Printf("ThreadSanitizerQuery(\"%s\") = \"%s\"\n", query, ret);
-  }
-  if (str == "trace-level=0") {
-    Report("INFO: trace-level=0\n");
-    G_flags->trace_level = 0;
-    debug_happens_before = false;
-  }
-  if (str == "trace-level=1") {
-    Report("INFO: trace-level=1\n");
-    G_flags->trace_level = 1;
-    debug_happens_before = true;
-  }
-  return ret;
-}
-
-extern void ThreadSanitizerInit() {
-  ScopedMallocCostCenter cc("ThreadSanitizerInit");
-  ts_lock = new TSLock;
-  ts_ignore_below_lock = new TSLock;
-  g_so_far_only_one_thread = true;
-  ANNOTATE_BENIGN_RACE(&g_so_far_only_one_thread, "real benign race");
-  CHECK_EQ(sizeof(ShadowValue), 8);
-  CHECK(G_flags);
-  G_stats        = new Stats;
-  SetupIgnore();
-
-  G_detector     = new Detector;
-  G_cache        = new Cache;
-  G_expected_races_map = new ExpectedRacesMap;
-  G_heap_map           = new HeapMap<HeapInfo>;
-  G_thread_stack_map   = new HeapMap<ThreadStackInfo>;
-  {
-    ScopedMallocCostCenter cc1("Segment::InitClassMembers");
-    Segment::InitClassMembers();
-  }
-  SegmentSet::InitClassMembers();
-  CacheLine::InitClassMembers();
-  TSanThread::InitClassMembers();
-  Lock::InitClassMembers();
-  LockSet::InitClassMembers();
-  EventSampler::InitClassMembers();
-  VTS::InitClassMembers();
-  // TODO(timurrrr): make sure *::InitClassMembers() are called only once for
-  // each class
-  g_publish_info_map = new PublishInfoMap;
-  g_stack_trace_free_list = new StackTraceFreeList;
-  g_pcq_map = new PCQMap;
-  g_atomicCore = new TsanAtomicCore();
-
-
-  if (G_flags->html) {
-    c_bold    = "<font ><b>";
-    c_red     = "<font color=red><b>";
-    c_green   = "<font color=green><b>";
-    c_magenta = "<font color=magenta><b>";
-    c_cyan    = "<font color=cyan><b>";
-    c_blue   = "<font color=blue><b>";
-    c_yellow  = "<font color=yellow><b>";
-    c_default = "</b></font>";
-  } else if (G_flags->color) {
-    // Enable ANSI colors.
-    c_bold    = "\033[1m";
-    c_red     = "\033[31m";
-    c_green   = "\033[32m";
-    c_yellow  = "\033[33m";
-    c_blue    = "\033[34m";
-    c_magenta = "\033[35m";
-    c_cyan    = "\033[36m";
-    c_default = "\033[0m";
-  }
-
-  if (G_flags->verbosity >= 1) {
-    Report("INFO: Started pid %d\n",  getpid());
-  }
-  if (G_flags->start_with_global_ignore_on) {
-    global_ignore = true;
-    Report("INFO: STARTING WITH GLOBAL IGNORE ON\n");
-  }
-  ANNOTATE_BENIGN_RACE(&g_lock_era,
-                       "g_lock_era may be incremented in a racey way");
-}
-
-extern void ThreadSanitizerFini() {
-  G_detector->HandleProgramEnd();
-}
-
-extern void ThreadSanitizerDumpAllStacks() {
-  // first, print running threads.
-  for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
-    TSanThread *t = TSanThread::Get(TID(i));
-    if (!t || !t->is_running()) continue;
-    Report("T%d\n", i);
-    t->ReportStackTrace();
-  }
-  // now print all dead threds.
-  for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
-    TSanThread *t = TSanThread::Get(TID(i));
-    if (!t || t->is_running()) continue;
-    Report("T%d (not running)\n", i);
-    t->ReportStackTrace();
-  }
-}
-
-
-extern void ThreadSanitizerHandleOneEvent(Event *e) {
-  // Lock is inside on some paths.
-  G_detector->HandleOneEvent(e);
-}
-
-TSanThread *ThreadSanitizerGetThreadByTid(int32_t tid) {
-  return TSanThread::Get(TID(tid));
-}
-
-extern NOINLINE void ThreadSanitizerHandleTrace(int32_t tid, TraceInfo *trace_info,
-                                       uintptr_t *tleb) {
-  ThreadSanitizerHandleTrace(TSanThread::Get(TID(tid)), trace_info, tleb);
-}
-extern NOINLINE void ThreadSanitizerHandleTrace(TSanThread *thr, TraceInfo *trace_info,
-                                                uintptr_t *tleb) {
-  DCHECK(thr);
-  // The lock is taken inside on the slow path.
-  G_detector->HandleTrace(thr,
-                          trace_info->mops(),
-                          trace_info->n_mops(),
-                          trace_info->pc(),
-                          tleb, /*need_locking=*/true);
-}
-
-extern NOINLINE void ThreadSanitizerHandleOneMemoryAccess(TSanThread *thr,
-                                                          MopInfo mop,
-                                                          uintptr_t addr) {
-  DCHECK(thr);
-  G_detector->HandleTrace(thr,
-                          &mop,
-                          1,
-                          mop.create_sblock() ? mop.pc() : 0,
-                          &addr, /*need_locking=*/true);
-}
-
-void NOINLINE ThreadSanitizerHandleRtnCall(int32_t tid, uintptr_t call_pc,
-                                         uintptr_t target_pc,
-                                         IGNORE_BELOW_RTN ignore_below) {
-  // This does locking on a cold path. Hot path in thread-local.
-  G_detector->HandleRtnCall(TID(tid), call_pc, target_pc, ignore_below);
-
-  if (G_flags->sample_events) {
-    static EventSampler sampler;
-    TSanThread *thr = TSanThread::Get(TID(tid));
-    sampler.Sample(thr, "RTN_CALL", true);
-  }
-}
-void NOINLINE ThreadSanitizerHandleRtnExit(int32_t tid) {
-  // This is a thread-local operation, no need for locking.
-  TSanThread::Get(TID(tid))->HandleRtnExit();
-}
-
-static bool ThreadSanitizerPrintReport(ThreadSanitizerReport *report) {
-  return G_detector->reports_.PrintReport(report);
-}
-
-
-// -------- TsanAtomicImplementation ------------------ {{{1
-
-// Atomic operation handler.
-// The idea of atomic handling is as simple as follows.
-// * First, we handle it as normal memory access,
-//     however with race reporting suppressed. That is, we won't produce any
-//     race reports during atomic access, but we can produce race reports
-//     later during normal memory accesses that race with the access.
-// * Then, we do the actual atomic memory access.
-//     It's executed in an atomic fashion, because there can be simultaneous
-//     atomic accesses from non-instrumented code (FUTEX_OP is a notable
-//     example).
-// * Finally, we update simulated memory model state according to
-//     the access type and associated memory order as follows.
-//     For writes and RMWs we create a new entry in the modification order
-//     of the variable. For reads we scan the modification order starting
-//     from the latest entry and going back in time, during the scan we decide
-//     what entry the read returns. A separate VTS (happens-before edges)
-//     is associated with each entry in the modification order, so that a load
-//     acquires memory visibility from the exact release-sequence associated
-//     with the loaded value.
-// For details of memory modelling refer to sections 1.10 and 29
-//     of C++0x standard:
-// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
-uint64_t ThreadSanitizerHandleAtomicOp(int32_t tid,
-                                       uintptr_t pc,
-                                       tsan_atomic_op op,
-                                       tsan_memory_order mo,
-                                       tsan_memory_order fail_mo,
-                                       size_t size,
-                                       void volatile* a,
-                                       uint64_t v,
-                                       uint64_t cmp) {
-  if (G_flags->enable_atomic == false) {
-    uint64_t newv = 0;
-    uint64_t prev = 0;
-    return tsan_atomic_do_op(op, mo, fail_mo, size, a, v, cmp, &newv, &prev);
-  } else {
-    uint64_t rv = 0;
-    TSanThread* thr = TSanThread::Get(TID(tid));
-    // Just a verification of the parameters.
-    tsan_atomic_verify(op, mo, fail_mo, size, a);
-
-    {
-      TIL til(ts_lock, 0);
-      uint64_t newv = 0;
-      uint64_t prev = 0;
-      // Handle it as a plain mop. Race reports are temporally suppressed,though.
-      thr->HandleAtomicMop((uintptr_t)a, pc, op, mo, size);
-      // Do the actual atomic operation. It's executed in an atomic fashion,
-      // because there can be simultaneous atomic accesses
-      // from non-instrumented code.
-      rv = tsan_atomic_do_op(op, mo, fail_mo, size, a, v, cmp, &newv, &prev);
-
-      PrintfIf(debug_atomic, "rv=%llu, newv=%llu, prev=%llu\n",
-               (unsigned long long)rv,
-               (unsigned long long)newv,
-               (unsigned long long)prev);
-
-      if (op != tsan_atomic_op_fence) {
-        if (op == tsan_atomic_op_load) {
-          // For reads it replaces the return value with a random value
-          // from visible sequence of side-effects in the modification order
-          // of the variable.
-          rv = g_atomicCore->HandleRead(thr, (uintptr_t)a, rv,
-                                        tsan_atomic_is_acquire(mo));
-        } else if ((op == tsan_atomic_op_compare_exchange_weak
-            || op == tsan_atomic_op_compare_exchange_strong)
-            && cmp != rv) {
-          // Failed compare_exchange is handled as read, because, well,
-          // it's indeed just a read (at least logically).
-          g_atomicCore->HandleRead(thr, (uintptr_t)a, rv,
-                                   tsan_atomic_is_acquire(fail_mo));
-        } else {
-          // For writes and RMW operations it updates modification order
-          // of the atomic variable.
-          g_atomicCore->HandleWrite(thr, (uintptr_t)a, newv, prev,
-                                    tsan_atomic_is_acquire(mo),
-                                    tsan_atomic_is_release(mo),
-                                    tsan_atomic_is_rmw(op));
-        }
-      }
-    }
-
-    PrintfIf(debug_atomic, "ATOMIC: %s-%s %p (%llu,%llu)=%llu\n",
-             tsan_atomic_to_str(op),
-             tsan_atomic_to_str(mo),
-             a, (unsigned long long)v, (unsigned long long)cmp,
-             (unsigned long long)rv);
-
-    return rv;
-  }
-}
-
-
-TsanAtomicCore::TsanAtomicCore() {
-}
-
-
-void TsanAtomicCore::HandleWrite(TSanThread* thr,
-                                 uintptr_t a,
-                                 uint64_t v,
-                                 uint64_t prev,
-                                 bool const is_acquire,
-                                 bool const is_release,
-                                 bool const is_rmw) {
-  PrintfIf(debug_atomic, "HIST(%p): store acquire=%u, release=%u, rmw=%u\n",
-           (void*)a, is_acquire, is_release, is_rmw);
-  Atomic* atomic = &atomic_map_[a];
-  // Fix modification history if there were untracked accesses.
-  AtomicFixHist(atomic, prev);
-  AtomicHistoryEntry& hprv = atomic->hist
-      [(atomic->hist_pos - 1) % Atomic::kHistSize];
-  AtomicHistoryEntry& hist = atomic->hist
-      [atomic->hist_pos % Atomic::kHistSize];
-  // Fill in new entry in the modification history.
-  hist.val = v;
-  hist.tid = thr->tid();
-  hist.clk = thr->vts()->clk(thr->tid());
-  if (hist.vts != 0) {
-    VTS::Unref(hist.vts);
-    hist.vts = 0;
-  }
-  atomic->hist_pos += 1;
-
-  // Update VTS according to memory access type and memory ordering.
-  if (is_rmw) {
-    if (is_release) {
-      if (hprv.vts != 0) {
-        hist.vts = VTS::Join(hprv.vts, thr->vts());
-      } else {
-        hist.vts = thr->vts()->Clone();
-      }
-    } else if (hprv.vts != 0) {
-      hist.vts = hprv.vts->Clone();
-    }
-    if (is_acquire && hprv.vts != 0) {
-      thr->NewSegmentForWait(hprv.vts);
-    }
-  } else {
-    DCHECK(is_acquire == false);
-    if (is_release) {
-      hist.vts = thr->vts()->Clone();
-    }
-  }
-
-  // Update the thread's VTS if it's relese memory access.
-  if (is_release) {
-    thr->NewSegmentForSignal();
-    if (debug_happens_before) {
-      Printf("T%d: Signal: %p:\n    %s %s\n    %s\n",
-             thr->tid().raw(), a,
-             thr->vts()->ToString().c_str(),
-             Segment::ToString(thr->sid()).c_str(),
-             hist.vts->ToString().c_str());
-      if (G_flags->debug_level >= 1) {
-        thr->ReportStackTrace();
-      }
-    }
-  }
-}
-
-
-uint64_t TsanAtomicCore::HandleRead(TSanThread* thr,
-                                    uintptr_t a,
-                                    uint64_t v,
-                                    bool is_acquire) {
-  PrintfIf(debug_atomic, "HIST(%p): {\n", (void*)a);
-
-  Atomic* atomic = &atomic_map_[a];
-  // Fix modification history if there were untracked accesses.
-  AtomicFixHist(atomic, v);
-  AtomicHistoryEntry* hist0 = 0;
-  int32_t seen_seq = 0;
-  int32_t const seen_seq0 = atomic->last_seen.clock(thr->tid());
-  // Scan modification order of the variable from the latest entry
-  // back in time. For each side-effect (write) we determine as to
-  // whether we have to yield the value or we can go back in time further.
-  for (int32_t i = 0; i != Atomic::kHistSize; i += 1) {
-    int32_t const idx = (atomic->hist_pos - i - 1);
-    CHECK(idx >= 0);
-    AtomicHistoryEntry& hist = atomic->hist[idx % Atomic::kHistSize];
-    PrintfIf(debug_atomic, "HIST(%p):   #%u (tid=%u, clk=%u,"
-           " val=%llu) vts=%u\n",
-           (void*)a, (unsigned)i, (unsigned)hist.tid.raw(),
-           (unsigned)hist.clk, (unsigned long long)hist.val,
-           (unsigned)thr->vts()->clk(hist.tid));
-    if (hist.tid.raw() == TID::kInvalidTID) {
-      // We hit an uninialized entry, that is, it's an access to an unitialized
-      // variable (potentially due to "race").
-      // Unfortunately, it should not happen as of now.
-      // TODO(dvyukov): how can we detect and report unitialized atomic reads?.
-      // .
-      hist0 = 0;
-      break;
-    } else if (i == Atomic::kHistSize - 1) {
-      // It's the last entry so we have to return it
-      // because we have to return something.
-      PrintfIf(debug_atomic, "HIST(%p):   replaced: last\n", (void*)a);
-      hist0 = &hist;
-      break;
-    } else if (seen_seq0 >= idx) {
-      // The thread had already seen the entry so we have to return
-      // at least it.
-      PrintfIf(debug_atomic, "HIST(%p):   replaced: stability\n", (void*)a);
-      hist0 = &hist;
-      break;
-    } else if (thr->vts()->clk(hist.tid) >= hist.clk) {
-      // The write happened-before the read, so we have to return it.
-      PrintfIf(debug_atomic, "HIST(%p):   replaced: ordering\n", (void*)a);
-      hist0 = &hist;
-      break;
-    } else if (thr->random() % 2) {
-      // We are not obliged to return the entry but we can (and decided to do).
-      PrintfIf(debug_atomic, "HIST(%p):   replaced: coherence\n", (void*)a);
-      seen_seq = idx;
-      hist0 = &hist;
-      break;
-    } else {
-      // Move on to the next (older) entry.
-      PrintfIf(debug_atomic, "HIST(%p):   can be replaced but not\n", (void*)a);
-    }
-  }
-
-  if (hist0 != 0) {
-    v = hist0->val;
-    // Acquire mamory visibility is needed.
-    if (is_acquire) {
-      if (hist0->vts != 0) {
-        thr->NewSegmentForWait(hist0->vts);
-      }
-
-      if (debug_happens_before) {
-        Printf("T%d: Wait: %p:\n    %s %s\n",
-               thr->tid().raw(), a,
-               thr->vts()->ToString().c_str(),
-               Segment::ToString(thr->sid()).c_str());
-        if (G_flags->debug_level >= 1) {
-          thr->ReportStackTrace();
-        }
-      }
-    }
-    if (seen_seq != 0) {
-      // Mark the entry as seen so we won't return any older entry later.
-      atomic->last_seen.update(thr->tid(), seen_seq);
-    }
-  } else {
-    CHECK("should never happen as of now" == 0);
-    PrintfIf(debug_atomic, "HIST(%p): UNITIALIZED LOAD\n", (void*)a);
-    v = thr->random();
-  }
-  PrintfIf(debug_atomic, "HIST(%p): } -> %llu\n",
-      (void*)a, (unsigned long long)v);
-  return v;
-}
-
-
-void TsanAtomicCore::ClearMemoryState(uintptr_t a, uintptr_t b) {
-  DCHECK(a <= b);
-  DCHECK(G_flags->enable_atomic || atomic_map_.empty());
-  AtomicMap::iterator begin (atomic_map_.lower_bound(a));
-  AtomicMap::iterator pos (begin);
-  for (; pos != atomic_map_.end() && pos->first <= b; ++pos) {
-    pos->second.reset();
-  }
-  atomic_map_.erase(begin, pos);
-}
-
-
-void TsanAtomicCore::AtomicFixHist(Atomic* atomic, uint64_t prev) {
-  AtomicHistoryEntry& hprv = atomic->hist
-      [(atomic->hist_pos - 1) % Atomic::kHistSize];
-  // In case we had missed an atomic access (that is, an access from 
-  // non-instrumented code), reset whole history and initialize it
-  // with a single entry that happened "before world creation".
-  if (prev != hprv.val) {
-    PrintfIf(debug_atomic, "HIST RESET\n");
-    atomic->reset();
-    AtomicHistoryEntry& hist = atomic->hist
-        [atomic->hist_pos % Atomic::kHistSize];
-    hist.val = prev;
-    hist.tid = TID(0);
-    hist.clk = 0;
-    atomic->hist_pos += 1;
-  }
-}
-
-
-TsanAtomicCore::Atomic::Atomic() {
-  reset(true);
-}
-
-
-void TsanAtomicCore::Atomic::reset(bool init) {
-  hist_pos = sizeof(hist)/sizeof(hist[0]) + 1;
-  for (size_t i = 0; i != sizeof(hist)/sizeof(hist[0]); i += 1) {
-    hist[i].val = 0xBCEBC041;
-    hist[i].tid = TID(TID::kInvalidTID);
-    hist[i].clk = -1;
-    if (init == false && hist[i].vts != 0)
-      VTS::Unref(hist[i].vts);
-    hist[i].vts = 0;
-  }
-  last_seen.reset();
-}
-
-
-// -------- TODO -------------------------- {{{1
-// - Support configurable aliases for function names (is it doable in valgrind)?
-// - Correctly support atomic operations (not just ignore).
-// - Handle INC as just one write
-//   - same for memset, etc
-// - Implement correct handling of memory accesses with different sizes.
-// - Do not create HB arcs between RdUnlock and RdLock
-// - Compress cache lines
-// - Optimize the case where a threads signals twice in a row on the same
-//   address.
-// - Fix --ignore-in-dtor if --demangle=no.
-// - Use cpplint (http://code.google.com/p/google-styleguide)
-// - Get rid of annoying casts in printfs.
-// - Compress stack traces (64-bit only. may save up to 36 bytes per segment).
-// end. {{{1
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/thread_sanitizer.h b/tsan/thread_sanitizer.h
deleted file mode 100644
index cbd3216..0000000
--- a/tsan/thread_sanitizer.h
+++ /dev/null
@@ -1,302 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-//--------- Head ------------------- {{{1
-#ifndef THREAD_SANITIZER_H_
-#define THREAD_SANITIZER_H_
-
-#include "ts_util.h"
-#include "ts_atomic.h"
-
-//--------- Utils ------------------- {{{1
-
-void Report(const char *format, ...);
-void PcToStrings(uintptr_t pc, bool demangle,
-                string *img_name, string *rtn_name,
-                string *file_name, int *line_no);
-string PcToRtnNameAndFilePos(uintptr_t pc);
-string PcToRtnName(uintptr_t pc, bool demangle);
-string Demangle(const char *str);
-
-
-//--------- FLAGS ---------------------------------- {{{1
-struct FLAGS {
-  string           input_type; // for ts_offline.
-                               // Possible values: str, bin, decode.
-  bool             ignore_stack;
-  intptr_t         verbosity;
-  intptr_t         show_stats;  // 0 -- no stats; 1 -- some stats; 2 more stats.
-  bool             trace_profile;
-  bool             show_expected_races;
-  uintptr_t        trace_addr;
-  uintptr_t        segment_set_recycle_queue_size;
-  uintptr_t        recent_segments_cache_size;
-  vector<string>   file_prefix_to_cut;
-  vector<string>   ignore;
-  vector<string>   whitelist;
-  bool             ignore_unknown_pcs;  // Ignore PCs with no debug info.
-  vector<string>   cut_stack_below;
-  string           summary_file;
-  string           log_file;
-  bool             offline;
-  intptr_t         max_n_threads;
-  bool             compress_cache_lines;
-  bool             unlock_on_mutex_destroy;
-
-  intptr_t         sample_events;
-  intptr_t         sample_events_depth;
-
-  intptr_t         num_callers;
-
-  intptr_t    keep_history;
-  bool        pure_happens_before;
-  bool        free_is_write;
-  bool        exit_after_main;
-  bool        demangle;
-  bool        announce_threads;
-  bool        full_output;
-  bool        show_states;
-  bool        show_proc_self_status;
-  bool        show_valgrind_context;  // debug-only
-  bool        suggest_happens_before_arcs;
-  bool        show_pc;
-  bool        full_stack_frames;
-  bool        color;  // Colorify terminal output.
-  bool        html;  // Output in html format.
-  bool        show_pid;
-
-  intptr_t  debug_level;
-  bool        save_ignore_context;  // print stack if ignore_end was forgotten.
-  vector<string> debug_phase;
-  intptr_t  trace_level;
-
-  intptr_t     dry_run;
-  intptr_t     max_sid;
-  intptr_t     max_sid_before_flush;
-  intptr_t     max_mem_in_mb;
-  intptr_t     num_callers_in_history;
-  intptr_t     flush_period;
-
-  intptr_t     literace_sampling;
-  bool         start_with_global_ignore_on;
-
-  intptr_t     locking_scheme;  // Used for internal experiments with locking.
-
-  bool         report_races;
-  bool         thread_coverage;
-  bool         atomicity;
-  bool         call_coverage;
-  string       dump_events;  // The name of log file. Debug mode only.
-  bool         symbolize;
-  bool         attach_mode;
-
-  string       tsan_program_name;
-  string       tsan_url;
-
-  vector<string> suppressions;
-  bool           generate_suppressions;
-
-  intptr_t     error_exitcode;
-  bool         trace_children;
-
-  vector<string> race_verifier;
-  vector<string> race_verifier_extra;
-  intptr_t       race_verifier_sleep_ms;
-
-  bool nacl_untrusted;
-
-  bool threaded_analysis;
-
-  bool sched_shake;
-  bool api_ambush;
-
-  bool enable_atomic;
-};
-
-extern FLAGS *G_flags;
-
-extern bool g_race_verifier_active;
-
-extern bool debug_expected_races;
-extern bool debug_malloc;
-extern bool debug_free;
-extern bool debug_thread;
-extern bool debug_rtn;
-extern bool debug_wrap;
-extern bool debug_ins;
-extern bool debug_shadow_stack;
-extern bool debug_race_verifier;
-
-// -------- CallStack ------------- {{{1
-const size_t kMaxCallStackSize = 1 << 12;
-
-struct CallStackPod {
-  uintptr_t *end_;
-  uintptr_t pcs_[kMaxCallStackSize];
-};
-
-struct CallStack: public CallStackPod {
-
-  CallStack() { Clear(); }
-
-  size_t size() { return (size_t)(end_ - pcs_); }
-  uintptr_t *pcs() { return pcs_; }
-
-  bool empty() { return end_ == pcs_; }
-
-  uintptr_t &back() {
-    DCHECK(!empty());
-    return *(end_ - 1);
-  }
-
-  void pop_back() {
-    DCHECK(!empty());
-    end_--;
-  }
-
-  void push_back(uintptr_t pc) {
-    DCHECK(size() < kMaxCallStackSize);
-    *end_ = pc;
-    end_++;
-  }
-
-  void Clear() {
-    end_ = pcs_;
-  }
-
-  uintptr_t &operator[] (size_t i) {
-    DCHECK(i < size());
-    return pcs_[i];
-  }
-
-};
-
-//--------- TS Exports ----------------- {{{1
-#include "ts_events.h"
-#include "ts_trace_info.h"
-
-struct TSanThread;
-void ThreadSanitizerInit();
-void ThreadSanitizerFini();
-// TODO(glider): this is a temporary solution to avoid deadlocks after fork().
-#ifdef TS_LLVM
-void ThreadSanitizerLockAcquire();
-void ThreadSanitizerLockRelease();
-#endif
-void ThreadSanitizerHandleOneEvent(Event *event);
-TSanThread *ThreadSanitizerGetThreadByTid(int32_t tid);
-void ThreadSanitizerHandleTrace(int32_t tid, TraceInfo *trace_info,
-                                       uintptr_t *tleb);
-void ThreadSanitizerHandleTrace(TSanThread *thr, TraceInfo *trace_info,
-                                       uintptr_t *tleb);
-void ThreadSanitizerHandleOneMemoryAccess(TSanThread *thr, MopInfo mop,
-                                                 uintptr_t addr);
-void ThreadSanitizerParseFlags(vector<string>* args);
-bool ThreadSanitizerWantToInstrumentSblock(uintptr_t pc);
-bool ThreadSanitizerWantToCreateSegmentsOnSblockEntry(uintptr_t pc);
-bool ThreadSanitizerIgnoreAccessesBelowFunction(uintptr_t pc);
-
-typedef int (*ThreadSanitizerUnwindCallback)(uintptr_t* stack, int size, uintptr_t pc);
-void ThreadSanitizerSetUnwindCallback(ThreadSanitizerUnwindCallback cb);
-
-/** Atomic operation handler.
- *  @param tid ID of a thread that issues the operation.
- *  @param pc Program counter that should be associated with the operation.
- *  @param op Type of the operation (load, store, etc).
- *  @param mo Memory ordering associated with the operation
- *      (relaxed, acquire, release, etc). NB there are some restrictions on
- *      what memory orderings can be used with what types of operations.
- *      E.g. a store can't have an acquire semantics
- *      (see C++0x standard draft for details).
- *  @param fail_mo Memory ordering the operation has if it fails,
- *      applicable only to compare_exchange oprations.
- *  @param size Size of the memory access in bytes (1, 2, 4 or 8).
- *  @param a Address of the memory access.
- *  @param v Operand for the operation (e.g. a value to store).
- *  @param cmp Comparand for compare_exchange oprations.
- *  @return Result of the operation (e.g. loaded value).
- */
-uint64_t ThreadSanitizerHandleAtomicOp(int32_t tid,
-                                       uintptr_t pc,
-                                       tsan_atomic_op op,
-                                       tsan_memory_order mo,
-                                       tsan_memory_order fail_mo,
-                                       size_t size,
-                                       void volatile* a,
-                                       uint64_t v,
-                                       uint64_t cmp);
-
-enum IGNORE_BELOW_RTN {
-  IGNORE_BELOW_RTN_UNKNOWN,
-  IGNORE_BELOW_RTN_NO,
-  IGNORE_BELOW_RTN_YES
-};
-
-void ThreadSanitizerHandleRtnCall(int32_t tid, uintptr_t call_pc,
-                                         uintptr_t target_pc,
-                                         IGNORE_BELOW_RTN ignore_below);
-
-void ThreadSanitizerHandleRtnExit(int32_t tid);
-
-void ThreadSanitizerPrintUsage();
-extern "C" const char *ThreadSanitizerQuery(const char *query);
-bool PhaseDebugIsOn(const char *phase_name);
-
-extern bool g_has_entered_main;
-extern bool g_has_exited_main;
-
-// -------- Stats ------------------- {{{1
-#include "ts_stats.h"
-extern Stats *G_stats;
-
-// -------- Expected Race ---------------------- {{{1
-// Information about expected races.
-struct ExpectedRace {
-  uintptr_t   ptr;
-  uintptr_t   size;
-  bool        is_verifiable;
-  bool        is_nacl_untrusted;
-  int         count;
-  const char *description;
-  uintptr_t   pc;
-};
-
-ExpectedRace* ThreadSanitizerFindExpectedRace(uintptr_t addr);
-
-// Tell ThreadSanitizer about the location of NaCl untrusted region.
-void ThreadSanitizerNaclUntrustedRegion(uintptr_t mem_start, uintptr_t mem_end);
-
-// Returns true if accesses and locks at the given address should be ignored
-// according to the current NaCl flags (--nacl-untrusted). Always false if not a
-// NaCl program.
-bool ThreadSanitizerIgnoreForNacl(uintptr_t addr);
-
-// end. {{{1
-#endif  //  THREAD_SANITIZER_H_
-
-// vim:shiftwidth=2:softtabstop=2:expandtab
diff --git a/tsan/thread_sanitizer_test.cc b/tsan/thread_sanitizer_test.cc
deleted file mode 100644
index c761078..0000000
--- a/tsan/thread_sanitizer_test.cc
+++ /dev/null
@@ -1,399 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-
-// This file contains tests for various parts of ThreadSanitizer.
-
-#include <gtest/gtest.h>
-
-#include "ts_heap_info.h"
-#include "ts_simple_cache.h"
-#include "dense_multimap.h"
-
-// Testing the HeapMap.
-struct TestHeapInfo {
-  uintptr_t ptr;
-  uintptr_t size;
-  int       val;
-  TestHeapInfo() : ptr(0), size(0), val(0) { }
-  TestHeapInfo(uintptr_t p, uintptr_t s, uintptr_t v) :
-    ptr(p), size(s), val(v) { }
-};
-
-TEST(ThreadSanitizer, HeapInfoTest) {
-  HeapMap<TestHeapInfo> map;
-  TestHeapInfo *info;
-  EXPECT_EQ(0U, map.size());
-  EXPECT_EQ(NULL, map.GetInfo(12345));
-
-  // Insert range [1000, 1000+100) with value 1.
-  map.InsertInfo(1000, TestHeapInfo(1000, 100, 1));
-  EXPECT_EQ(1U, map.size());
-  info = map.GetInfo(1000);
-  EXPECT_TRUE(info);
-  EXPECT_EQ(1000U, info->ptr);
-  EXPECT_EQ(100U, info->size);
-  EXPECT_EQ(1, info->val);
-
-  EXPECT_TRUE(map.GetInfo(1000));
-  EXPECT_EQ(1, info->val);
-  EXPECT_TRUE(map.GetInfo(1050));
-  EXPECT_EQ(1, info->val);
-  EXPECT_TRUE(map.GetInfo(1099));
-  EXPECT_EQ(1, info->val);
-  EXPECT_FALSE(map.GetInfo(1100));
-  EXPECT_FALSE(map.GetInfo(2000));
-
-  EXPECT_EQ(NULL, map.GetInfo(2000));
-  EXPECT_EQ(NULL, map.GetInfo(3000));
-
-  // Insert range [2000, 2000+200) with value 2.
-  map.InsertInfo(2000, TestHeapInfo(2000, 200, 2));
-  EXPECT_EQ(2U, map.size());
-
-  info = map.GetInfo(1000);
-  EXPECT_TRUE(info);
-  EXPECT_EQ(1, info->val);
-
-  info = map.GetInfo(2000);
-  EXPECT_TRUE(info);
-  EXPECT_EQ(2, info->val);
-
-  info = map.GetInfo(1000);
-  EXPECT_TRUE(info);
-  EXPECT_EQ(1, info->val);
-  EXPECT_TRUE((info = map.GetInfo(1050)));
-  EXPECT_EQ(1, info->val);
-  EXPECT_TRUE((info = map.GetInfo(1099)));
-  EXPECT_EQ(1, info->val);
-  EXPECT_FALSE(map.GetInfo(1100));
-
-  EXPECT_TRUE((info = map.GetInfo(2000)));
-  EXPECT_EQ(2, info->val);
-  EXPECT_TRUE((info = map.GetInfo(2199)));
-  EXPECT_EQ(2, info->val);
-
-  EXPECT_FALSE(map.GetInfo(2200));
-  EXPECT_FALSE(map.GetInfo(3000));
-
-  // Insert range [3000, 3000+300) with value 3.
-  map.InsertInfo(3000, TestHeapInfo(3000, 300, 3));
-  EXPECT_EQ(3U, map.size());
-
-  EXPECT_TRUE((info = map.GetInfo(1000)));
-  EXPECT_EQ(1, info->val);
-
-  EXPECT_TRUE((info = map.GetInfo(2000)));
-  EXPECT_EQ(2, info->val);
-
-  EXPECT_TRUE((info = map.GetInfo(3000)));
-  EXPECT_EQ(3, info->val);
-
-  EXPECT_TRUE((info = map.GetInfo(1050)));
-  EXPECT_EQ(1, info->val);
-
-  EXPECT_TRUE((info = map.GetInfo(2100)));
-  EXPECT_EQ(2, info->val);
-
-  EXPECT_TRUE((info = map.GetInfo(3200)));
-  EXPECT_EQ(3, info->val);
-
-  // Remove range [2000,2000+200)
-  map.EraseInfo(2000);
-  EXPECT_EQ(2U, map.size());
-
-  EXPECT_TRUE((info = map.GetInfo(1050)));
-  EXPECT_EQ(1, info->val);
-
-  EXPECT_FALSE(map.GetInfo(2100));
-
-  EXPECT_TRUE((info = map.GetInfo(3200)));
-  EXPECT_EQ(3, info->val);
-
-}
-
-TEST(ThreadSanitizer, PtrToBoolCacheTest) {
-  PtrToBoolCache<256> c;
-  bool val = false;
-  EXPECT_FALSE(c.Lookup(123, &val));
-
-  c.Insert(0, false);
-  c.Insert(1, true);
-  c.Insert(2, false);
-  c.Insert(3, true);
-
-  EXPECT_TRUE(c.Lookup(0, &val));
-  EXPECT_EQ(false, val);
-  EXPECT_TRUE(c.Lookup(1, &val));
-  EXPECT_EQ(true, val);
-  EXPECT_TRUE(c.Lookup(2, &val));
-  EXPECT_EQ(false, val);
-  EXPECT_TRUE(c.Lookup(3, &val));
-  EXPECT_EQ(true, val);
-
-  EXPECT_FALSE(c.Lookup(256, &val));
-  EXPECT_FALSE(c.Lookup(257, &val));
-  EXPECT_FALSE(c.Lookup(258, &val));
-  EXPECT_FALSE(c.Lookup(259, &val));
-
-  c.Insert(0, true);
-  c.Insert(1, false);
-
-  EXPECT_TRUE(c.Lookup(0, &val));
-  EXPECT_EQ(true, val);
-  EXPECT_TRUE(c.Lookup(1, &val));
-  EXPECT_EQ(false, val);
-  EXPECT_TRUE(c.Lookup(2, &val));
-  EXPECT_EQ(false, val);
-  EXPECT_TRUE(c.Lookup(3, &val));
-  EXPECT_EQ(true, val);
-
-  c.Insert(256, false);
-  c.Insert(257, false);
-  EXPECT_FALSE(c.Lookup(0, &val));
-  EXPECT_FALSE(c.Lookup(1, &val));
-  EXPECT_TRUE(c.Lookup(2, &val));
-  EXPECT_EQ(false, val);
-  EXPECT_TRUE(c.Lookup(3, &val));
-  EXPECT_EQ(true, val);
-  EXPECT_TRUE(c.Lookup(256, &val));
-  EXPECT_EQ(false, val);
-  EXPECT_TRUE(c.Lookup(257, &val));
-  EXPECT_EQ(false, val);
-}
-
-TEST(ThreadSanitizer, IntPairToBoolCacheTest) {
-  IntPairToBoolCache<257> c;
-  bool val = false;
-  map<pair<int,int>, bool> m;
-
-  for (int i = 0; i < 1000000; i++) {
-    int a = (rand() % 1024) + 1;
-    int b = (rand() % 1024) + 1;
-
-    if (c.Lookup(a, b, &val)) {
-      EXPECT_EQ(1U, m.count(make_pair(a,b)));
-      EXPECT_EQ(val, m[make_pair(a,b)]);
-    }
-
-    val = (rand() % 2) == 1;
-    c.Insert(a, b, val);
-    m[make_pair(a,b)] = val;
-  }
-}
-
-TEST(ThreadSanitizer, DenseMultimapTest) {
-  typedef DenseMultimap<int, 3> Map;
-
-  Map m1(1, 2);
-  EXPECT_EQ(m1[0], 1);
-  EXPECT_EQ(m1[1], 2);
-  EXPECT_EQ(m1.size(), 2U);
-
-  Map m2(3, 2);
-  EXPECT_EQ(m2[0], 2);
-  EXPECT_EQ(m2[1], 3);
-  EXPECT_EQ(m2.size(), 2U);
-
-  Map m3(m1, 0);
-  EXPECT_EQ(m3.size(), 3U);
-  EXPECT_EQ(m3[0], 0);
-  EXPECT_EQ(m3[1], 1);
-  EXPECT_EQ(m3[2], 2);
-
-  Map m4(m3, 1);
-  EXPECT_EQ(m4.size(), 4U);
-  EXPECT_EQ(m4[0], 0);
-  EXPECT_EQ(m4[1], 1);
-  EXPECT_EQ(m4[2], 1);
-  EXPECT_EQ(m4[3], 2);
-
-  Map m5(m4, 5);
-  Map m6(m5, -2);
-  Map m7(m6, 2);
-  EXPECT_EQ(m7.size(), 7U);
-
-  EXPECT_TRUE(m7.has(-2));
-  EXPECT_TRUE(m7.has(0));
-  EXPECT_TRUE(m7.has(1));
-  EXPECT_TRUE(m7.has(2));
-  EXPECT_TRUE(m7.has(5));
-  EXPECT_FALSE(m7.has(3));
-  EXPECT_FALSE(m7.has(-1));
-  EXPECT_FALSE(m7.has(4));
-
-  Map m8(m7, Map::REMOVE, 1);
-  EXPECT_EQ(m8.size(), 6U);
-  EXPECT_TRUE(m8.has(1));
-
-  Map m9(m8, Map::REMOVE, 1);
-  EXPECT_EQ(m9.size(), 5U);
-  EXPECT_FALSE(m9.has(1));
-}
-
-TEST(ThreadSanitizer, NormalizeFunctionNameNotChangingTest) {
-  const char *samples[] = {
-    // These functions should not be changed by NormalizeFunctionName():
-    // C functions
-    "main",
-    "pthread_mutex_unlock",
-    "pthread_create@@GLIBC_2.2.5",
-    "pthread_create@*"
-
-    // Valgrind can give us this, we should keep it.
-    "(below main)",
-
-    // C++ operators
-    "operator new[]",
-    "operator delete[]",
-
-    // PIN on Windows handles non-templated C++ code well
-    "my_namespace::ClassName::Method",
-    "PositiveTests_HarmfulRaceInDtor::A::~A",
-    "PositiveTests_HarmfulRaceInDtor::B::`scalar deleting destructor'",
-
-    // Objective-C on Mac
-    "+[NSNavFBENode _virtualNodeOfType:]",
-    "-[NSObject(NSObject) autorelease]",
-    "-[NSObject(NSKeyValueCoding) setValue:forKeyPath:]",
-    "-[NSCell(NSPrivate_CellMouseTracking) _setMouseTrackingInRect:ofView:]",
-    // TODO(glider): other interesting cases from Objective-C?
-    // Should we "s/:.*\]/\]/" ?
-  };
-
-  for (size_t i = 0; i < sizeof(samples) / sizeof(samples[0]); i += 2) {
-    EXPECT_STREQ(samples[i], NormalizeFunctionName(samples[i]).c_str());
-  }
-}
-
-TEST(ThreadSanitizer, NormalizeFunctionNameChangingTest) {
-  const char *samples[] = {
-    // These functions should be changed by removing <.*> and (.*) while
-    // correctly handling the "function returns a [template] function pointer"
-    // case.
-    // This is a list of (full demangled name, short name) pairs.
-    "SuppressionTests::Foo(int*)", "SuppressionTests::Foo",
-    "logging::LogMessage::Init(char const*, int)", "logging::LogMessage::Init",
-    "void DispatchToMethod<net::SpdySession, void (net::SpdySession::*)(int), int>(net::SpdySession*, void (net::SpdySession::*)(int), Tuple1<int> const&)",
-        "DispatchToMethod",
-    "MessageLoop::DeferOrRunPendingTask(MessageLoop::PendingTask const&)",
-        "MessageLoop::DeferOrRunPendingTask",
-    "spdy::SpdyFramer::ProcessInput(char const*, unsigned long)",
-        "spdy::SpdyFramer::ProcessInput",
-    "base::RefCountedThreadSafe<history::HistoryBackend, base::DefaultRefCountedThreadSafeTraits<history::HistoryBackend> >::Release() const",
-        "base::RefCountedThreadSafe::Release",
-    "net::X509Certificate::Verify(std::string const&, int, net::CertVerifyResult*) const",
-        "net::X509Certificate::Verify",
-
-    "(anonymous namespace)::ExtentToStringSet(ExtensionExtent const&, std::set<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >*)",
-        "::ExtentToStringSet",
-
-    "scoped_ptr<(anonymous namespace)::ImportEndedObserver>::operator->() const",
-        "scoped_ptr::operator->",
-
-    "int (anonymous namespace)::ValueCompare<long>(long, long)",
-        "::ValueCompare",
-
-    "std::basic_string<unsigned short, base::string16_char_traits, std::allocator<unsigned short> > const& std::__median<std::basic_string<unsigned short, base::string16_char_traits, std::allocator<unsigned short> >, (anonymous namespace)::CompareQuality>(std::basic_string<unsigned short, base::string16_char_traits, std::allocator<unsigned short> > const&, std::basic_string<unsigned short, base::string16_char_traits, std::allocator<unsigned short> > const&, std::basic_string<unsigned short, base::string16_char_traits, std::allocator<unsigned short> > const&, (anonymous namespace)::CompareQuality)",
-        "std::__median",
-
-    "std::basic_ostream<char, std::char_traits<char> >& std::operator<< <char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, std::_Setprecision)",
-        "std::operator<<",
-
-    "net::(anonymous namespace)::CookieSignature::operator<(net::(anonymous namespace)::CookieSignature const&) const",
-        "net::::CookieSignature::operator<",
-
-    "v8::Handle<v8::Value> (*v8::ToCData<v8::Handle<v8::Value> (*)(v8::Arguments const&)>(v8::internal::Object*))(v8::Arguments const&)",
-        "v8::ToCData",
-
-    "v8::internal::Handle<v8::internal::Object> v8::FromCData<v8::Handle<v8::Value> (*)(v8::Local<v8::String>, v8::AccessorInfo const&)>(v8::Handle<v8::Value> (*)(v8::Local<v8::String>, v8::AccessorInfo const&))",
-        "v8::FromCData",
-
-    "WebCore::operator<<(WebCore::TextStream&, WebCore::LineCap)",
-        "WebCore::operator<<",
-
-    "__gnu_cxx::__normal_iterator<void (**)(), std::vector<void (*)(), std::allocator<void (*)()> > >::base() const",
-        "__gnu_cxx::__normal_iterator::base",
-
-    "__gnu_cxx::__normal_iterator<device_orientation::DataFetcher* (* const*)(), std::vector<device_orientation::DataFetcher* (*)(), std::allocator<device_orientation::DataFetcher* (*)()> > >::operator++()",
-        "__gnu_cxx::__normal_iterator::operator++",
-
-    "__gnu_cxx::__normal_iterator<std::pair<int, std::basic_string<unsigned short, base::string16_char_traits, std::allocator<unsigned short> > >*, std::vector<std::pair<int, std::basic_string<unsigned short, base::string16_char_traits, std::allocator<unsigned short> > >, std::allocator<std::pair<int, std::basic_string<unsigned short, base::string16_char_traits, std::allocator<unsigned short> > > > > >::operator->() const",
-        "__gnu_cxx::__normal_iterator::operator->",
-
-    "std::less<CancelableRequestConsumerTSimple<PageUsageData*>::PendingRequest>::operator()(CancelableRequestConsumerTSimple<PageUsageData*>::PendingRequest const&, CancelableRequestConsumerTSimple<PageUsageData*>::PendingRequest const&) const",
-        "std::less::operator()",
-
-    "SuppressionTests::MyClass<int>::Fooz(int*) const",
-        "SuppressionTests::MyClass::Fooz",
-
-    // Templates and functions returning function pointers
-    "void (*SuppressionTests::TemplateFunction1<void (*)(int*)>(void (*)(int*)))(int*)",
-        "SuppressionTests::TemplateFunction1",  // Valgrind, Linux
-    "void SuppressionTests::TemplateFunction2<void>()",
-        "SuppressionTests::TemplateFunction2",  // OMG, return type in template
-    "void (**&SuppressionTests::TemplateFunction3<void (*)(int)>())",
-        "SuppressionTests::TemplateFunction3",  // Valgrind, Linux
-
-    "SuppressionTests::TemplateFunction1<void (__cdecl*)(int *)>",
-        "SuppressionTests::TemplateFunction1",  // PIN, Windows
-    "SuppressionTests::MyClass<int>::Fooz",
-        "SuppressionTests::MyClass::Fooz",
-    "std::operator<<char,std::char_traits<char>,std::allocator<char> >",
-        "(malformed frame)",  // Should be "std::operator<"? Really?
-    "std::_Ranit<stdext::_Hash<stdext::_Hmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,int,stdext::hash_compare<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > > >,std::allocator<std::pair<std::basic_string<char,std::char_traits<char>,std::allocator<char> > const ,int> >,0> >::_List_position,int,stdext::_Hash<stdext::_Hmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,int,stdext::hash_compare<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > > >,std::allocator<std::pair<std::basic_string<char,std::char_traits<char>,std::allocator<char> > const ,int> >,0> >::_List_position const *,stdext::_Hash<stdext::_Hmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,int,stdext::hash_compare<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > > >,std::allocator<std::pair<std::basic_string<char,std::char_traits<char>,std::allocator<char> > const ,int> >,0> >::_List_position const &>::_Ranit<stdext::_Hash<stdext::_Hmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,int,stdext::hash_compare<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > > >,std::allocator<std::pair<std::basic_string<char,std::char_traits<char>,std::allocator<char> > const ,int> >,0> >::_List_position,int,stdext::_Hash<stdext::_Hmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,int,stdext::hash_compare<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > > >,std::allocator<std::pair<std::basic",
-        "(malformed frame)",
-    "std::_Tree_val<std::_Tmap_traits<net::`anonymous namespace'::CookieSignature,std::set<std::_Tree<std::_Tmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,net::CookieMonster::CanonicalCookie *,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > >,std::allocator<std::pair<std::basic_string<char,std::char_traits<char>,std::allocator<char> > const ,net::CookieMonster::CanonicalCookie *> >,1> >::iterator,net::`anonymous namespace'::OrderByCreationTimeDesc,std::allocator<std::_Tree<std::_Tmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,net::CookieMonster::CanonicalCookie *,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > >,std::allocator<std::pair<std::basic_string<char,std::char_traits<char>,std::allocator<char> > const ,net::CookieMonster::CanonicalCookie *> >,1> >::iterator> >,std::less<net::`anonymous namespace'::CookieSignature>,std::allocator<std::pair<net::`anonymous namespace'::CookieSignature const ,std::set<std::_Tree<std::_Tmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,net::CookieMonster::CanonicalCookie *,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > >,std::allocator<std::pair<std::basic_string<char,std::char_traits<char>,std::allocator<char> > const ,net::CookieMonster::CanonicalCookie *> >,1> >::iterator,net::`anonymous namespace'::OrderByCreationTimeDesc,std::allocator<std::_Tree<std::_Tmap_traits<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,net::CookieMonster::CanonicalCookie *,std::less<std::basic_string<char,std::char_traits<char>,std::allocator<char> > >,std::allocator<std::pair<std::basic_string<char,std::char_traits<char>,std::allocator<char> > const ,net::CookieMonster::CanonicalCookie *> >,1> >::iterator> > > >,0> >::~_Tree_val<std::_Tmap_traits<net::`anonymous namespace'::CookieSignature,std::set<std::_Tree<std::_Tmap_traits<std::basic_string<",
-        "(malformed frame)",
-
-    "__gnu_cxx::new_allocator<char>::allocate(unsigned long, void const*)",
-        "__gnu_cxx::new_allocator::allocate",
-
-    "PositiveTests_HarmfulRaceInDtor::A::~A()",  // Valgrind, Linux
-        "PositiveTests_HarmfulRaceInDtor::A::~A",
-
-    "X::foo(int*) const()",   // GCC, Linux
-        "X::foo",
-    "X::foo(int*) const volatile",
-        "X::foo",
-
-    "base::(anonymous namespace)::ThreadFunc(void*)",
-      "base::::ThreadFunc",  // TODO(timurrrr): keep "anonymous namespace"?
-
-    "operator new[](unsigned long)", "operator new[]",  // Valgrind, Linux
-  };
-
-  for (size_t i = 0; i < sizeof(samples) / sizeof(samples[0]); i += 2) {
-    EXPECT_STREQ(samples[i+1], NormalizeFunctionName(samples[i]).c_str());
-  }
-}
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/tsan/ts_atomic.cc b/tsan/ts_atomic.cc
deleted file mode 100644
index 9358fcf..0000000
--- a/tsan/ts_atomic.cc
+++ /dev/null
@@ -1,432 +0,0 @@
-/* ThreadSanitizer
- * Copyright (c) 2011, Google Inc. All rights reserved.
- * Author: Dmitry Vyukov (dvyukov)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ts_util.h"
-#include "ts_atomic_int.h"
-
-
-char const* tsan_atomic_to_str(tsan_memory_order mo) {
-  switch (mo) {
-    case tsan_memory_order_invalid: return "invalid";
-    case tsan_memory_order_natomic: return "natomic";
-    case tsan_memory_order_relaxed: return "relaxed";
-    case tsan_memory_order_consume: return "consume";
-    case tsan_memory_order_acquire: return "acquire";
-    case tsan_memory_order_release: return "release";
-    case tsan_memory_order_acq_rel: return "acq_rel";
-    case tsan_memory_order_seq_cst: return "seq_cst";
-    default: return "-------";
-  }
-}
-
-
-char const* tsan_atomic_to_str(tsan_atomic_op op) {
-  switch (op) {
-    case tsan_atomic_op_invalid: return "invalid";
-    case tsan_atomic_op_fence: return "fence";
-    case tsan_atomic_op_load: return "load";
-    case tsan_atomic_op_store: return "store";
-    case tsan_atomic_op_exchange: return "exchange";
-    case tsan_atomic_op_fetch_add: return "fetch_add";
-    case tsan_atomic_op_fetch_sub: return "fetch_sub";
-    case tsan_atomic_op_fetch_and: return "fetch_and";
-    case tsan_atomic_op_fetch_xor: return "fetch_xor";
-    case tsan_atomic_op_fetch_or: return "fetch_or";
-    case tsan_atomic_op_compare_exchange_weak: return "compare_exchange_weak";
-    case tsan_atomic_op_compare_exchange_strong:
-      return "compare_exchange_strong";
-    default: return "---";
-  }
-}
-
-
-bool tsan_atomic_is_acquire(tsan_memory_order mo) {
-  return !!(mo & (tsan_memory_order_consume
-      | tsan_memory_order_acquire
-      | tsan_memory_order_acq_rel
-      | tsan_memory_order_seq_cst));
-}
-
-
-bool tsan_atomic_is_release(tsan_memory_order mo) {
-  return !!(mo & (tsan_memory_order_release
-      | tsan_memory_order_acq_rel
-      | tsan_memory_order_seq_cst));
-}
-
-
-bool tsan_atomic_is_rmw(tsan_atomic_op op) {
-  return !!(op & (tsan_atomic_op_exchange
-      | tsan_atomic_op_fetch_add
-      | tsan_atomic_op_fetch_sub
-      | tsan_atomic_op_fetch_and
-      | tsan_atomic_op_fetch_xor
-      | tsan_atomic_op_fetch_or
-      | tsan_atomic_op_compare_exchange_weak
-      | tsan_atomic_op_compare_exchange_strong));
-}
-
-
-void tsan_atomic_verify(tsan_atomic_op op,
-                        tsan_memory_order mo,
-                        tsan_memory_order fail_mo,
-                        size_t size,
-                        void volatile* a) {
-  CHECK(size == 1 || size == 2 || size == 4 || size == 8);
-  CHECK((((uintptr_t)a) % size) == 0);
-
-  if (op == tsan_atomic_op_load) {
-    CHECK(mo & (tsan_memory_order_natomic
-        | tsan_memory_order_relaxed
-        | tsan_memory_order_consume
-        | tsan_memory_order_acquire
-        | tsan_memory_order_seq_cst));
-  } else if (op == tsan_atomic_op_store) {
-    CHECK(mo & (tsan_memory_order_natomic
-        | tsan_memory_order_relaxed
-        | tsan_memory_order_release
-        | tsan_memory_order_seq_cst));
-  } else if (op == tsan_atomic_op_fence) {
-    CHECK(mo & (tsan_memory_order_consume
-        | tsan_memory_order_acquire
-        | tsan_memory_order_release
-        | tsan_memory_order_acq_rel
-        | tsan_memory_order_seq_cst));
-  } else if (op & (tsan_atomic_op_exchange
-        | tsan_atomic_op_fetch_add
-        | tsan_atomic_op_fetch_sub
-        | tsan_atomic_op_fetch_and
-        | tsan_atomic_op_fetch_xor
-        | tsan_atomic_op_fetch_or
-        | tsan_atomic_op_compare_exchange_weak
-        | tsan_atomic_op_compare_exchange_strong)) {
-    CHECK(mo & (tsan_memory_order_relaxed
-        | tsan_memory_order_consume
-        | tsan_memory_order_acquire
-        | tsan_memory_order_release
-        | tsan_memory_order_acq_rel
-        | tsan_memory_order_seq_cst));
-  } else {
-    CHECK("unknown tsan_atomic_op" == 0);
-  }
-}
-
-
-#if defined(__i386__)
-# define __x86__
-#elif defined(__x86_64__)
-# define __x86__
-#endif
-
-#if defined(__GNUC__) && defined(__x86_64__)
-uint64_t tsan_atomic_do_op(tsan_atomic_op op,
-                           tsan_memory_order mo,
-                           tsan_memory_order fail_mo,
-                           size_t size,
-                           void volatile* a,
-                           uint64_t v,
-                           uint64_t cmp,
-                           uint64_t* newv,
-                           uint64_t* prev) {
-  *newv = v;
-  if (op != tsan_atomic_op_fence) {
-    if (size == 1) {
-      *prev = *(uint8_t volatile*)a;
-    } else if (size == 2) {
-      *prev =  *(uint16_t volatile*)a;
-    } else if (size == 4) {
-      *prev =  *(uint32_t volatile*)a;
-    } else if (size == 8) {
-      *prev =  *(uint64_t volatile*)a;
-    }
-  }
-
-  if (op == tsan_atomic_op_load) {
-    return *prev;
-
-  } else if (op == tsan_atomic_op_store) {
-    if (mo == tsan_memory_order_seq_cst) {
-      if (size == 1) {
-        uint8_t vv = (uint8_t)v;
-        __asm__ __volatile__ ("xchgb %1, %0"
-            : "=r" (vv) : "m" (*(uint8_t volatile*)a), "0" (vv));
-        *prev = vv;
-      } else if (size == 2) {
-        uint16_t vv = (uint16_t)v;
-        __asm__ __volatile__ ("xchgw %1, %0"
-            : "=r" (vv) : "m" (*(uint16_t volatile*)a), "0" (vv));
-        *prev = vv;
-      } else if (size == 4) {
-        uint32_t vv = (uint32_t)v;
-        __asm__ __volatile__ ("xchgl %1, %0"
-            : "=r" (vv) : "m" (*(uint32_t volatile*)a), "0" (vv));
-        *prev = vv;
-      } else if (size == 8) {
-#ifdef __x86_64__
-        uint64_t vv = (uint64_t)v;
-        __asm__ __volatile__ ("xchgq %1, %0"
-            : "=r" (vv) : "m" (*(uint64_t volatile*)a), "0" (vv));
-        *prev = vv;
-#else
-#error "IMPLEMENT ME, PLZ"
-        //uint64_t cmp = *a;
-        //!!!while (!tsan_atomic64_compare_exchange_strong(a, &cmp, v, mo, mo))
-        //!!! {}
-#endif
-      }
-    } else {
-      if (size == 1) {
-        *(uint8_t volatile*)a = v;
-      } else if (size == 2) {
-        *(uint16_t volatile*)a = v;
-      } else if (size == 4) {
-        *(uint32_t volatile*)a = v;
-      } else if (size == 8) {
-        *(uint64_t volatile*)a = v;
-      }
-    }
-    return 0;
-
-  } else if (op == tsan_atomic_op_exchange) {
-    if (size == 1) {
-      uint8_t vv = (uint8_t)v;
-      __asm__ __volatile__ ("xchgb %1, %0"
-          : "=r" (vv) : "m" (*(uint8_t volatile*)a), "0" (vv));
-      *prev = vv;
-      return vv;
-    } else if (size == 2) {
-      uint16_t vv = (uint16_t)v;
-      __asm__ __volatile__ ("xchgw %1, %0"
-          : "=r" (vv) : "m" (*(uint16_t volatile*)a), "0" (vv));
-      *prev = vv;
-      return vv;
-    } else if (size == 4) {
-      uint32_t vv = (uint32_t)v;
-      __asm__ __volatile__ ("xchgl %1, %0"
-          : "=r" (vv) : "m" (*(uint32_t volatile*)a), "0" (vv));
-      *prev = vv;
-      return vv;
-    } else if (size == 8) {
-# ifdef __x86_64__
-      uint64_t vv = (uint64_t)v;
-      __asm__ __volatile__ ("xchgq %1, %0"
-          : "=r" (vv) : "m" (*(uint64_t volatile*)a), "0" (vv));
-      *prev = vv;
-      return vv;
-#else
-#error "IMPLEMENT ME, PLZ"
-      //uint64_t cmp = *a;
-      //while (!tsan_atomic64_compare_exchange_strong(a, &cmp, v, mo, mo))
-      // {}
-      //return cmp;
-#endif
-    }
-
-  } else if (op == tsan_atomic_op_fetch_add) {
-    if (size == 1) {
-      uint8_t prevv = __sync_fetch_and_add((uint8_t volatile*)a, (uint8_t)v);
-      *prev = prevv;
-      *newv = prevv + (uint8_t)v;
-      return prevv;
-    } else if (size == 2) {
-      uint16_t prevv = __sync_fetch_and_add(
-          (uint16_t volatile*)a, (uint16_t)v);
-      *prev = prevv;
-      *newv = prevv + (uint16_t)v;
-      return prevv;
-    } else if (size == 4) {
-      uint32_t prevv = __sync_fetch_and_add(
-          (uint32_t volatile*)a, (uint32_t)v);
-      *prev = prevv;
-      *newv = prevv + (uint32_t)v;
-      return prevv;
-    } else if (size == 8) {
-      uint64_t prevv = __sync_fetch_and_add(
-          (uint64_t volatile*)a, (uint64_t)v);
-      *prev = prevv;
-      *newv = prevv + v;
-      return prevv;
-    }
-
-  } else if (op == tsan_atomic_op_fetch_sub) {
-    if (size == 1) {
-      uint8_t prevv = __sync_fetch_and_sub(
-          (uint8_t volatile*)a, (uint8_t)v);
-      *prev = prevv;
-      *newv = prevv - (uint8_t)v;
-      return prevv;
-    } else if (size == 2) {
-      uint16_t prevv = __sync_fetch_and_sub(
-          (uint16_t volatile*)a, (uint16_t)v);
-      *prev = prevv;
-      *newv = prevv - (uint16_t)v;
-      return prevv;
-    } else if (size == 4) {
-      uint32_t prevv = __sync_fetch_and_sub(
-          (uint32_t volatile*)a, (uint32_t)v);
-      *prev = prevv;
-      *newv = prevv - (uint32_t)v;
-      return prevv;
-    } else if (size == 8) {
-      uint64_t prevv = __sync_fetch_and_sub(
-          (uint64_t volatile*)a, (uint64_t)v);
-      *prev = prevv;
-      *newv = prevv - v;
-      return prevv;
-    }
-
-  } else if (op == tsan_atomic_op_fetch_and) {
-    if (size == 1) {
-      uint8_t prevv = __sync_fetch_and_and(
-          (uint8_t volatile*)a, (uint8_t)v);
-      *prev = prevv;
-      *newv = prevv & (uint8_t)v;
-      return prevv;
-    } else if (size == 2) {
-      uint16_t prevv = __sync_fetch_and_and(
-          (uint16_t volatile*)a, (uint16_t)v);
-      *prev = prevv;
-      *newv = prevv & (uint16_t)v;
-      return prevv;
-    } else if (size == 4) {
-      uint32_t prevv = __sync_fetch_and_and(
-          (uint32_t volatile*)a, (uint32_t)v);
-      *prev = prevv;
-      *newv = prevv & (uint32_t)v;
-      return prevv;
-    } else if (size == 8) {
-      uint64_t prevv = __sync_fetch_and_and(
-          (uint64_t volatile*)a, (uint64_t)v);
-      *prev = prevv;
-      *newv = prevv & v;
-      return prevv;
-    }
-
-  } else if (op == tsan_atomic_op_fetch_xor) {
-    if (size == 1) {
-      uint8_t prevv = __sync_fetch_and_xor(
-          (uint8_t volatile*)a, (uint8_t)v);
-      *prev = prevv;
-      *newv = prevv ^ (uint8_t)v;
-      return prevv;
-    } else if (size == 2) {
-      uint16_t prevv = __sync_fetch_and_xor(
-          (uint16_t volatile*)a, (uint16_t)v);
-      *prev = prevv;
-      *newv = prevv ^ (uint16_t)v;
-      return prevv;
-    } else if (size == 4) {
-      uint32_t prevv = __sync_fetch_and_xor(
-          (uint32_t volatile*)a, (uint32_t)v);
-      *prev = prevv;
-      *newv = prevv ^ (uint32_t)v;
-      return prevv;
-    } else if (size == 8) {
-      uint64_t prevv = __sync_fetch_and_xor(
-          (uint64_t volatile*)a, (uint64_t)v);
-      *prev = prevv;
-      *newv = prevv ^ v;
-      return prevv;
-    }
-
-  } else if (op == tsan_atomic_op_fetch_or) {
-    if (size == 1) {
-      uint8_t prevv = __sync_fetch_and_or(
-          (uint8_t volatile*)a, (uint8_t)v);
-      *prev = prevv;
-      *newv = prevv | (uint8_t)v;
-      return prevv;
-    } else if (size == 2) {
-      uint16_t prevv = __sync_fetch_and_or(
-          (uint16_t volatile*)a, (uint16_t)v);
-      *prev = prevv;
-      *newv = prevv | (uint16_t)v;
-      return prevv;
-    } else if (size == 4) {
-      uint32_t prevv = __sync_fetch_and_or(
-          (uint32_t volatile*)a, (uint32_t)v);
-      *prev = prevv;
-      *newv = prevv | (uint32_t)v;
-      return prevv;
-    } else if (size == 8) {
-      uint64_t prevv = __sync_fetch_and_or(
-          (uint64_t volatile*)a, (uint64_t)v);
-      *prev = prevv;
-      *newv = prevv | v;
-      return prevv;
-    }
-
-  } else if (op == tsan_atomic_op_compare_exchange_strong
-          || op == tsan_atomic_op_compare_exchange_weak) {
-    uint64_t prevv = 0;
-    if (size == 1) {
-      prevv = __sync_val_compare_and_swap((uint8_t volatile*)a, cmp, v);
-    } else if (size == 2) {
-      prevv = __sync_val_compare_and_swap((uint16_t volatile*)a, cmp, v);
-    } else if (size == 4) {
-      prevv = __sync_val_compare_and_swap((uint32_t volatile*)a, cmp, v);
-    } else if (size == 8) {
-      prevv = __sync_val_compare_and_swap((uint64_t volatile*)a, cmp, v);
-    }
-    *prev = prevv;
-    return prevv;
-
-  } else if (op == tsan_atomic_op_fence) {
-    if (mo == tsan_memory_order_seq_cst)
-      __sync_synchronize();
-    return 0;
-  }
-
-  CHECK("unknown atomic operation" == 0);
-  return 0;
-}
-
-#else
-
-uint64_t tsan_atomic_do_op(tsan_atomic_op op,
-                           tsan_memory_order mo,
-                           tsan_memory_order fail_mo,
-                           size_t size,
-                           void volatile* a,
-                           uint64_t v,
-                           uint64_t cmp,
-                           uint64_t* newv,
-                           uint64_t* prev) {
-  CHECK(!"IMPLEMENTED" == 0);
-  return 0;
-}
-
-#endif
-
-
-
-
-
-
-
-
-
diff --git a/tsan/ts_atomic.h b/tsan/ts_atomic.h
deleted file mode 100644
index 168a931..0000000
--- a/tsan/ts_atomic.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* ThreadSanitizer
- * Copyright (c) 2011, Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TS_ATOMIC_H_INCLUDED
-#define TS_ATOMIC_H_INCLUDED
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-// These constants mostly mimic ones from C++0x standard draft.
-// The most recent version of the draft (as of now) can be found here:
-// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
-// Check out fresh versions here:
-// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/
-// Refer to sections 1.10 and 29.
-//
-// tsan_memory_order_invalid has no meaning other than invalid enum value.
-// tsan_memory_order_natomic stands for "non atomic" and expresses
-// as if plain memory access that is not intended to race
-// with other accesses.
-typedef enum tsan_memory_order {
-  tsan_memory_order_invalid = 0,
-  tsan_memory_order_natomic = 1 << 0,
-  tsan_memory_order_relaxed = 1 << 1,
-  tsan_memory_order_consume = 1 << 2,
-  tsan_memory_order_acquire = 1 << 3,
-  tsan_memory_order_release = 1 << 4,
-  tsan_memory_order_acq_rel = 1 << 5,
-  tsan_memory_order_seq_cst = 1 << 6
-} tsan_memory_order;
-
-
-// These constants express types of atomic memory operations
-// as defined by C++0x standard draft (section 29).
-//
-// tsan_atomic_op_invalid has no meaning other than invalid enum value.
-// compare_exchange_weak differs from compare_exchange_strong in that
-// it can fail spuriously.
-typedef enum tsan_atomic_op {
-  tsan_atomic_op_invalid = 0,
-  tsan_atomic_op_fence = 1 << 0,
-  tsan_atomic_op_load = 1 << 1,
-  tsan_atomic_op_store = 1 << 2,
-  tsan_atomic_op_exchange = 1 << 3,
-  tsan_atomic_op_fetch_add = 1 << 4,
-  tsan_atomic_op_fetch_sub = 1 << 5,
-  tsan_atomic_op_fetch_and  = 1 << 6,
-  tsan_atomic_op_fetch_xor = 1 << 7,
-  tsan_atomic_op_fetch_or = 1 << 8,
-  tsan_atomic_op_compare_exchange_weak = 1 << 9,
-  tsan_atomic_op_compare_exchange_strong = 1 << 10
-} tsan_atomic_op;
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // #ifndef TS_ATOMIC_H_INCLUDED
-
-
diff --git a/tsan/ts_atomic_int.h b/tsan/ts_atomic_int.h
deleted file mode 100644
index a973154..0000000
--- a/tsan/ts_atomic_int.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* ThreadSanitizer
- * Copyright (c) 2011, Google Inc. All rights reserved.
- * Author: Dmitry Vyukov (dvyukov)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TS_ATOMIC_INT_H_INCLUDED
-#define TS_ATOMIC_INT_H_INCLUDED
-
-#include "ts_atomic.h"
-#include "ts_util.h"
-#include <stddef.h>
-
-// Helper functions for atomic support
-
-char const* tsan_atomic_to_str(tsan_memory_order mo);
-char const* tsan_atomic_to_str(tsan_atomic_op op);
-bool tsan_atomic_is_acquire(tsan_memory_order mo);
-bool tsan_atomic_is_release(tsan_memory_order mo);
-bool tsan_atomic_is_rmw(tsan_atomic_op op);
-void tsan_atomic_verify(tsan_atomic_op op,
-                        tsan_memory_order mo,
-                        tsan_memory_order fail_mo,
-                        size_t size,
-                        void volatile* a);
-uint64_t tsan_atomic_do_op(tsan_atomic_op op,
-                           tsan_memory_order mo,
-                           tsan_memory_order fail_mo,
-                           size_t size,
-                           void volatile* a,
-                           uint64_t v,
-                           uint64_t cmp,
-                           uint64_t* newv,
-                           uint64_t* prev);
-
-#endif // #ifndef TS_ATOMIC_INT_H_INCLUDED
-
diff --git a/tsan/ts_dynamorio.cc b/tsan/ts_dynamorio.cc
deleted file mode 100644
index 87655e7..0000000
--- a/tsan/ts_dynamorio.cc
+++ /dev/null
@@ -1,507 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-
-// Some parts of the code in this file are taken from the examples
-// in DynamoRIO distribution, which have the following copyright.
-/* **********************************************************
- * Copyright (c) 2003-2008 VMware, Inc.  All rights reserved.
- * **********************************************************/
-
-/*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- *   this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright notice,
- *   this list of conditions and the following disclaimer in the documentation
- *   and/or other materials provided with the distribution.
- *
- * * Neither the name of VMware, Inc. nor the names of its contributors may be
- *   used to endorse or promote products derived from this software without
- *   specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- */
-
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-//
-// ******* WARNING ********
-// This code is experimental. Do not expect anything here to work.
-// ***** END WARNING ******
-
-#include "dr_api.h"
-
-#include "ts_util.h"
-
-#define EXTRA_REPLACE_PARAMS
-#define REPORT_READ_RANGE(a,b)
-#define REPORT_WRITE_RANGE(a,b)
-#include "ts_replace.h"
-
-#define Printf dr_printf
-
-static void *g_lock;
-static int   g_n_created_threads;
-
-typedef unordered_map<intptr_t, string> SymbolsTable;
-static SymbolsTable *sym_tab;
-
-string *g_main_module_path;
-
-//--------------- StackFrame ----------------- {{{1
-struct StackFrame {
-  uintptr_t pc;
-  uintptr_t sp;
-  StackFrame(uintptr_t p, uintptr_t s) : pc(p), sp(s) { }
-};
-
-
-//--------------- DrThread ----------------- {{{1
-struct DrThread {
-  int tid;  // A unique 0-based thread id.
-  vector<StackFrame> shadow_stack;
-};
-
-static DrThread &GetCurrentThread(void *drcontext) {
-  return *(DrThread*)dr_get_tls_field(drcontext);
-}
-
-//--------------- ShadowStack ----------------- {{{1
-#define DEB_PR (0 && t.tid == 1)
-
-static void PrintShadowStack(DrThread &t) {
-  Printf("T%d Shadow stack (%d)\n", t.tid, (int)t.shadow_stack.size());
-  for (int i = t.shadow_stack.size() - 1; i >= 0; i--) {
-    uintptr_t pc = t.shadow_stack[i].pc;
-    Printf("%s[%p]\n", g_main_module_path->c_str(), pc);
-  }
-  for (int i = t.shadow_stack.size() - 1; i >= 0; i--) {
-    uintptr_t pc = t.shadow_stack[i].pc;
-    uintptr_t sp = t.shadow_stack[i].sp;
-    Printf("  sp=%p pc=%p\n", sp, pc);
-  }
-}
-
-static void UpdateShadowStack(DrThread &t, uintptr_t sp) {
-  while (t.shadow_stack.size() > 0 && sp >= t.shadow_stack.back().sp) {
-    t.shadow_stack.pop_back();
-    if (DEB_PR) {
-      dr_mutex_lock(g_lock);
-      Printf("T%d PopShadowStack\n", t.tid);
-      PrintShadowStack(t);
-      dr_mutex_unlock(g_lock);
-    }
-  }
-}
-
-static void PushShadowStack(DrThread &t, uintptr_t pc, uintptr_t target_pc, uintptr_t sp) {
-  if (t.shadow_stack.size() > 0) {
-    t.shadow_stack.back().pc = pc;
-  }
-  t.shadow_stack.push_back(StackFrame(target_pc, sp));
-  if (DEB_PR) {
-    dr_mutex_lock(g_lock);
-    Printf("T%d PushShadowStack %p %p %d\n", t.tid, pc, target_pc, sp);
-    PrintShadowStack(t);
-    dr_mutex_unlock(g_lock);
-  }
-}
-
-//--------------- callbacks ----------------- {{{1
-static void OnEvent_ThreadInit(void *drcontext) {
-  DrThread *t_ptr = new DrThread;
-  DrThread &t = *t_ptr;
-
-  dr_mutex_lock(g_lock);
-  t.tid = g_n_created_threads++;
-  dr_mutex_unlock(g_lock);
-
-  dr_set_tls_field(drcontext, t_ptr);
-
-  dr_printf("T%d %s\n", t.tid, (char*)__FUNCTION__+8);
-}
-
-static void OnEvent_ThreadExit(void *drcontext) {
-  DrThread &t = GetCurrentThread(drcontext);
-  dr_printf("T%d %s\n", t.tid, (char*)__FUNCTION__+8);
-}
-
-void OnEvent_ModuleLoaded(void *drcontext, const module_data_t *info,
-                          bool loaded) {
-  // if this assertion fails, your DynamoRIO is too old. You need rev261 with some patches...
-  CHECK(info->full_path);
-
-  dr_printf("%s: %s (%s)\n", __FUNCTION__,
-            dr_module_preferred_name(info), info->full_path);
-  if (g_main_module_path == NULL) {
-    g_main_module_path = new string(info->full_path);
-  }
-}
-
-static void OnEvent_Exit(void) {
-  dr_printf("ThreadSanitizerDynamoRio: done\n");
-  dr_mutex_destroy(g_lock);
-}
-
-static void On_Mop(uintptr_t pc, size_t size, void *a, bool is_w) {
-  void *drcontext = dr_get_current_drcontext();
-  DrThread &t = GetCurrentThread(drcontext);
-  if (t.tid == 777) {
-    dr_fprintf(STDERR, "T%d pc=%p a=%p size=%ld %s\n", t.tid, pc, a, size, is_w ? "WRITE" : "READ");
-  }
-}
-
-static void On_Read(uintptr_t pc, size_t size, void *a) {
-  On_Mop(pc, size, a, false);
-}
-
-static void On_Write(uintptr_t pc, size_t size, void *a) {
-  On_Mop(pc, size, a, true);
-}
-
-static void On_AnyCall(uintptr_t pc, uintptr_t target_pc, uintptr_t sp, bool is_direct) {
-  void *drcontext = dr_get_current_drcontext();
-  DrThread &t = GetCurrentThread(drcontext);
-  // dr_fprintf(STDOUT, "T%d CALL %p => %p; sp=%p\n", t.tid, pc, target_pc, sp);
-  PushShadowStack(t, pc, target_pc, sp);
-}
-
-static void On_DirectCall(uintptr_t pc, uintptr_t target_pc, uintptr_t sp) {
-  On_AnyCall(pc, target_pc, sp, true);
-}
-
-static void On_IndirectCall(uintptr_t pc, uintptr_t target_pc, uintptr_t sp) {
-  On_AnyCall(pc, target_pc, sp, false);
-}
-
-static void On_TraceEnter(uintptr_t pc, uintptr_t sp) {
-  void *drcontext = dr_get_current_drcontext();
-  DrThread &t = GetCurrentThread(drcontext);
-  // dr_fprintf(STDOUT, "T%d TRACE:\n%p\n%p\n", t.tid, pc, sp);
-  UpdateShadowStack(t, sp);
-}
-
-//--------------- instrumentation ----------------- {{{1
-opnd_t opnd_create_base_disp_from_dst(opnd_t dst) {
-  return opnd_create_base_disp(opnd_get_base(dst),
-                               opnd_get_index(dst),
-                               opnd_get_scale(dst),
-                               opnd_get_disp(dst),
-                               OPSZ_lea);
-}
-
-static void InstrumentOneMop(void* drcontext, instrlist_t *bb,
-                             instr_t *instr, opnd_t opnd, bool is_w) {
-  //   opnd_disassemble(drcontext, opnd, 1);
-  //   dr_printf("  -- (%s opnd)\n", is_w ? "write" : "read");
-  void *callback = (void*)(is_w ? On_Write : On_Read);
-  int size = opnd_size_in_bytes(opnd_get_size(opnd));
-
-  instr_t *tmp_instr = NULL;
-  reg_id_t reg = REG_XAX;
-
-  /* save %xax */
-  dr_save_reg(drcontext, bb, instr, reg, SPILL_SLOT_2);
-
-  if (opnd_is_base_disp(opnd)) {
-    /* lea opnd => %xax */
-    opnd_set_size(&opnd, OPSZ_lea);
-    tmp_instr = INSTR_CREATE_lea(drcontext,
-                                 opnd_create_reg(reg),
-                                 opnd);
-  } else if(
-#ifdef X86_64
-      opnd_is_rel_addr(opnd) ||
-#endif
-      opnd_is_abs_addr(opnd)) {
-    tmp_instr = INSTR_CREATE_mov_imm(drcontext,
-                                     opnd_create_reg(reg),
-                                     OPND_CREATE_INTPTR(opnd_get_addr(opnd)));
-  }
-  if (tmp_instr) {
-    // CHECK(tmp_instr);
-    instrlist_meta_preinsert(bb, instr, tmp_instr);
-
-    /* clean call */
-    dr_insert_clean_call(drcontext, bb, instr, callback, false,
-                         3,
-                         OPND_CREATE_INTPTR(instr_get_app_pc(instr)),
-                         OPND_CREATE_INT32(size),
-                         opnd_create_reg(reg));
-    /* restore %xax */
-    dr_restore_reg(drcontext, bb, instr, REG_XAX, SPILL_SLOT_2);
-  } else {
-    dr_printf("%s ????????????????????\n", __FUNCTION__);
-  }
-}
-
-static void InstrumentMopInstruction(void *drcontext,
-                                     instrlist_t *bb, instr_t *instr) {
-  // reads:
-  for (int a = 0; a < instr_num_srcs(instr); a++) {
-    opnd_t curop = instr_get_src(instr, a);
-    if (opnd_is_memory_reference(curop)) {
-      InstrumentOneMop(drcontext, bb, instr, curop, false);
-    }
-  }
-  // writes:
-  for (int a = 0; a < instr_num_dsts(instr); a++) {
-    opnd_t curop = instr_get_dst(instr, a);
-    if (opnd_is_memory_reference(curop)) {
-      InstrumentOneMop(drcontext, bb, instr, curop, true);
-    }
-  }
-  //dr_printf("reads: %d writes: %d\n", n_reads, n_writes);
-}
-
-static void InstrumentInstruction(void *drcontext, instrlist_t *bb,
-                                  instr_t *instr) {
-  // instr_disassemble(drcontext, instr, 1);
-  // dr_printf("  -- \n");
-  if (instr_is_call_direct(instr)) {
-    dr_insert_call_instrumentation(drcontext, bb, instr,
-                                   (app_pc)On_DirectCall);
-  } else if (instr_is_call_indirect(instr)) {
-    dr_insert_mbr_instrumentation(drcontext, bb, instr,
-                                  (app_pc)On_IndirectCall, SPILL_SLOT_1);
-
-  } else if (instr_reads_memory(instr) || instr_writes_memory(instr)) {
-    InstrumentMopInstruction(drcontext, bb, instr);
-  }
-}
-
-static dr_emit_flags_t OnEvent_Trace(void *drcontext, void *tag,
-                                     instrlist_t *trace, bool translating) {
-  instr_t *first_instr = NULL;
-  for (instr_t *instr = instrlist_first(trace); instr != NULL;
-       instr = instr_get_next(instr)) {
-    if (instr_get_app_pc(instr)) {
-      first_instr = instr;
-      break;
-    }
-  }
-  if (first_instr) {
-    // instr_disassemble(drcontext, first_instr, 1);
-    // dr_printf("  -- in_trace %p\n", instr_get_app_pc(first_instr));
-    dr_insert_clean_call(drcontext, trace, first_instr,
-                         (void*)On_TraceEnter, false,
-                         2,
-                         OPND_CREATE_INTPTR(instr_get_app_pc(first_instr)),
-                         opnd_create_reg(REG_XSP)
-                         );
-  }
-  return DR_EMIT_DEFAULT;
-}
-
-int replace_foo(int i, int j, int k) {
-  dr_printf(" dy 'foo_replace'(%i, %i, %i)\n", i, j, k);
-  return 1;
-}
-
-typedef unordered_map<intptr_t, void*> FunctionsReplaceMap;
-static FunctionsReplaceMap *fun_replace_map;
-
-namespace wrap {
-
-int (*orig_foo)(int,int,int) = NULL;
-int in_wrapper = 0;  // TODO: Make it thread-local
-
-static int wrapped_foo(int i, int j, int k) {
-  in_wrapper = 1;
-
-  dr_printf(" dy 'foo_wrap'(%i, %i, %i)\n", i, j, k);
-  dr_printf("orig_foo = %p\n", orig_foo);
-  int ret = 13;
-  if (orig_foo != NULL)
-    ret = orig_foo(i, j, k) + 4200;
-  else
-    dr_printf("ERROR! orig_foo is not set!\n");/**/
-
-  in_wrapper = 0;
-  return ret;
-}
-
-int is_in_wrapper(int arg) {
-  // TODO: this may not work well with recursive functions
-  return in_wrapper;
-}
-}
-
-void print_bb(void* drcontext, instrlist_t *bb, const char * desc) {
-  dr_printf("==================\n");
-  dr_printf("%s:\n", desc);
-  for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) {
-    instr_disassemble(drcontext, i, 1);
-    dr_printf("\n");
-  }
-  dr_printf("==================\n");
-}
-
-static dr_emit_flags_t OnEvent_BB(void* drcontext, void *tag, instrlist_t *bb,
-                                  bool for_trace, bool translating) {
-  instr_t *first_instr = instrlist_first(bb);
-  app_pc pc = instr_get_app_pc(first_instr);
-  string symbol_name = "UNKNOWN";
-  if (sym_tab->find((intptr_t)pc) != sym_tab->end()) {
-    symbol_name = (*sym_tab)[(intptr_t)pc];
-    //dr_printf("Symbol = %s\n", symbol_name.c_str());
-  }
-
-  if (fun_replace_map->count((intptr_t)pc) > 0) {
-    // Replace client function with the function supplied by the tool.
-    // The logic is inspired by drmemory/replace.c
-    app_pc target_fun = (app_pc)(*fun_replace_map)[(intptr_t)pc];
-    const module_data_t *info = dr_lookup_module(pc);
-    dr_printf("REDIR: %s (from %s) redirected to %p\n",
-              symbol_name.c_str(), info->full_path, target_fun);
-
-    instrlist_clear(drcontext, bb);
-    instrlist_append(bb, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(target_fun)), pc));
-  } else {
-    if (StringMatch("*foo_to_wrap*", symbol_name)) {
-      const module_data_t *info = dr_lookup_module(pc);
-      dr_printf(" 'foo_to_wrap' entry point: bb %p, %s / %s\n", pc, dr_module_preferred_name(info), info->full_path);
-      wrap::orig_foo = (int (*)(int,int,int))(void*)pc;
-
-      //print_bb(drcontext, bb, "BEFORE");
-      // TODO: Use something more optimized than clean_call
-      dr_insert_clean_call(drcontext, bb, first_instr, (void*)wrap::is_in_wrapper,
-                           false, 1, OPND_CREATE_INTPTR(pc));
-      instr_t *opr_instr = INSTR_CREATE_test(drcontext, opnd_create_reg(REG_XAX),
-                                                        opnd_create_reg(REG_XAX));
-      instr_t *jne_instr = INSTR_CREATE_jcc(drcontext, OP_jz,
-                                            opnd_create_pc((app_pc)wrap::wrapped_foo));
-      instrlist_meta_preinsert(bb, first_instr, opr_instr);
-      instrlist_meta_preinsert(bb, first_instr, jne_instr);
-
-      //print_bb(drcontext, bb, "AFTER");
-    }
-
-    instr_t *instr, *next_instr;
-    for (instr = instrlist_first(bb); instr != NULL; instr = next_instr) {
-      next_instr = instr_get_next(instr);
-      if (instr_get_app_pc(instr))  // don't instrument non-app code
-        InstrumentInstruction(drcontext, bb, instr);
-    }
-
-
-    OnEvent_Trace(drcontext, tag, bb, translating);
-  }
-
-  return DR_EMIT_DEFAULT;
-}
-
-void ReadSymbolsTableFromFile(const char *filename) {
-  file_t f = dr_open_file(filename, DR_FILE_READ);
-  CHECK(f != INVALID_FILE);
-
-  const int BUFF_SIZE = 1 << 16;  // should be enough for testing
-  char buff[BUFF_SIZE];
-  dr_read_file(f, buff, BUFF_SIZE);
-  char *cur_line = buff;
-  while (*cur_line) {
-    char *next_line = strstr(cur_line, "\n");
-    if (next_line != NULL)
-      *next_line = 0;
-    char fun_name[1024];
-    char dummy;
-    void* pc;
-    sscanf(cur_line, "%p %c %s", &pc, &dummy, fun_name);
-    //dr_printf("%s => %p\n", fun_name, pc);
-    (*sym_tab)[(intptr_t)pc] = fun_name;
-
-    if (next_line == NULL) break;
-    cur_line = next_line + 1;
-  }
-
-}
-
-void ReplaceFunc3(void *img, void *rtn, string filter, void *fun_ptr) {
-  for (SymbolsTable::iterator i = sym_tab->begin(); i != sym_tab->end(); i++) {
-    if (StringMatch(filter, i->second))
-      (*fun_replace_map)[(intptr_t)i->first] = fun_ptr;
-  }
-}
-
-//--------------- dr_init ----------------- {{{1
-DR_EXPORT void dr_init(client_id_t id) {
-  sym_tab = new SymbolsTable;
-
-  // HACK doesn't work if multiple options are passed.
-  const char *opstr = dr_get_options(id);
-  dr_printf("Options: %s\n", opstr);
-  const char *fname = strstr(opstr, "--symbols=");
-  if (fname) {
-    ReadSymbolsTableFromFile(fname + 10);
-  }
-
-  // Register events.
-  dr_register_exit_event(OnEvent_Exit);
-  dr_register_bb_event(OnEvent_BB);
-  dr_register_trace_event(OnEvent_Trace);
-  dr_register_thread_init_event(OnEvent_ThreadInit);
-  dr_register_thread_exit_event(OnEvent_ThreadExit);
-  dr_register_module_load_event(OnEvent_ModuleLoaded);
-  g_lock = dr_mutex_create();
-
-  fun_replace_map = new FunctionsReplaceMap();
-  void *img = NULL, *rtn = NULL;
-  #define AFUNPTR void*
-  ReplaceFunc3(img, rtn, "memchr", (AFUNPTR)Replace_memchr);
-  ReplaceFunc3(img, rtn, "strchr", (AFUNPTR)Replace_strchr);
-  ReplaceFunc3(img, rtn, "index", (AFUNPTR)Replace_strchr);
-  ReplaceFunc3(img, rtn, "strchrnul", (AFUNPTR)Replace_strchrnul);
-  ReplaceFunc3(img, rtn, "strrchr", (AFUNPTR)Replace_strrchr);
-  ReplaceFunc3(img, rtn, "rindex", (AFUNPTR)Replace_strrchr);
-  ReplaceFunc3(img, rtn, "strlen", (AFUNPTR)Replace_strlen);
-  ReplaceFunc3(img, rtn, "memcpy", (AFUNPTR)Replace_memcpy);
-  ReplaceFunc3(img, rtn, "memmove", (AFUNPTR)Replace_memmove);
-  ReplaceFunc3(img, rtn, "memcmp", (AFUNPTR)Replace_memcmp);
-  ReplaceFunc3(img, rtn, "strcpy", (AFUNPTR)Replace_strcpy);
-  ReplaceFunc3(img, rtn, "stpcpy", (AFUNPTR)Replace_stpcpy);
-  ReplaceFunc3(img, rtn, "strncpy", (AFUNPTR)Replace_strncpy);
-  ReplaceFunc3(img, rtn, "strcmp", (AFUNPTR)Replace_strcmp);
-  ReplaceFunc3(img, rtn, "strncmp", (AFUNPTR)Replace_strncmp);
-  ReplaceFunc3(img, rtn, "strcat", (AFUNPTR)Replace_strcat);
-  ReplaceFunc3(img, rtn, "*foo_to_replace*", (AFUNPTR)replace_foo);
-}
-// end. {{{1
-// vim:shiftwidth=2:softtabstop=2:expandtab
diff --git a/tsan/ts_event_names.h b/tsan/ts_event_names.h
deleted file mode 100644
index b8d4a89..0000000
--- a/tsan/ts_event_names.h
+++ /dev/null
@@ -1,57 +0,0 @@
-static const char *kEventNames[] =  {
-  "NOOP",               // Should not appear.
-  "READ",               // {tid, pc, addr, size}
-  "WRITE",              // {tid, pc, addr, size}
-  "READER_LOCK",        // {tid, pc, lock, 0}
-  "WRITER_LOCK",        // {tid, pc, lock, 0}
-  "UNLOCK",             // {tid, pc, lock, 0}
-  "UNLOCK_OR_INIT",     // {tid, pc, lock, 0}
-  "LOCK_CREATE",        // {tid, pc, lock, 0}
-  "LOCK_DESTROY",       // {tid, pc, lock, 0}
-  "THR_CREATE_BEFORE",  // Parent thread's event. {tid, pc, 0, 0}
-  "THR_CREATE_AFTER",   // Parent thread's event. {tid, 0, 0, child_tid}
-  "THR_START",          // Child thread's event {tid, CallStack, 0, parent_tid}
-  "THR_FIRST_INSN",     // Used only by valgrind.
-  "THR_END",            // {tid, 0, 0, 0}
-  "THR_JOIN_AFTER",     // {tid, pc, joined_tid}
-  "THR_STACK_TOP",      // {tid, pc, stack_top, stack_size_if_known}
-  "RTN_EXIT",           // {tid, 0, 0, 0}
-  "RTN_CALL",           // {tid, pc, 0, 0}
-  "SBLOCK_ENTER",       // {tid, pc, 0, 0}
-  "SIGNAL",             // {tid, pc, obj, 0}
-  "WAIT",               // {tid, pc, obj, 0}
-  "CYCLIC_BARRIER_INIT",         // {tid, pc, obj, n}
-  "CYCLIC_BARRIER_WAIT_BEFORE",  // {tid, pc, obj, 0}
-  "CYCLIC_BARRIER_WAIT_AFTER",   // {tid, pc, obj, 0}
-  "PCQ_CREATE",         // {tid, pc, pcq_addr, 0}
-  "PCQ_DESTROY",        // {tid, pc, pcq_addr, 0}
-  "PCQ_PUT",            // {tid, pc, pcq_addr, 0}
-  "PCQ_GET",            // {tid, pc, pcq_addr, 0}
-  "STACK_MEM_DIE",      // deprecated.
-  "MALLOC",             // {tid, pc, addr, size}
-  "FREE",               // {tid, pc, addr, 0}
-  "MMAP",               // {tid, pc, addr, size}
-  "MUNMAP",             // {tid, pc, addr, size}
-  "PUBLISH_RANGE",      // may be deprecated later.
-  "UNPUBLISH_RANGE",    // deprecated. TODO(kcc): get rid of this.
-  "HB_LOCK",            // {tid, pc, addr, 0}
-  "NON_HB_LOCK",        // {tid, pc, addr, 0}
-  "IGNORE_READS_BEG",   // {tid, pc, 0, 0}
-  "IGNORE_READS_END",   // {tid, pc, 0, 0}
-  "IGNORE_WRITES_BEG",  // {tid, pc, 0, 0}
-  "IGNORE_WRITES_END",  // {tid, pc, 0, 0}
-  "SET_THREAD_NAME",    // {tid, pc, name_str, 0}
-  "SET_LOCK_NAME",      // {tid, pc, lock, lock_name_str}
-  "TRACE_MEM",          // {tid, pc, addr, 0}
-  "EXPECT_RACE",        // {tid, descr_str, ptr, size}
-  "BENIGN_RACE",        // {tid, descr_str, ptr, size}
-  "EXPECT_RACE_BEGIN",  // {tid, pc, 0, 0}
-  "EXPECT_RACE_END",    // {tid, pc, 0, 0}
-  "VERBOSITY",          // Used for debugging.
-  "STACK_TRACE",        // {tid, pc, 0, 0}, for debugging.
-  "FLUSH_STATE",        // {tid, pc, 0, 0}
-  "PC_DESCRIPTION",     // {0, pc, descr_str, 0}, for ts_offline.
-  "PRINT_MESSAGE",      // {tid, pc, message_str, 0}, for ts_offline.
-  "FLUSH_EXPECTED_RACES",  // {0, 0, 0, 0}
-  "LAST_EVENT"          // Should not appear.
-};
diff --git a/tsan/ts_events.h b/tsan/ts_events.h
deleted file mode 100644
index 97f6c5b..0000000
--- a/tsan/ts_events.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-
-#ifndef TS_EVENTS_H_
-#define TS_EVENTS_H_
-
-// Each event contains tid (the id of the current thread).
-// Most events contain pc (the program counter).
-// Some events contain:
-//  * addr, a memory address, a lock address, etc
-//  * size of a memory range
-// Few events contain a string (e.g. SET_THREAD_NAME).
-
-enum EventType {
-  NOOP,               // Should not appear.
-  READ,               // {tid, pc, addr, size}
-  WRITE,              // {tid, pc, addr, size}
-  READER_LOCK,        // {tid, pc, lock, 0}
-  WRITER_LOCK,        // {tid, pc, lock, 0}
-  UNLOCK,             // {tid, pc, lock, 0}
-  UNLOCK_OR_INIT,     // {tid, pc, lock, 0}
-  LOCK_CREATE,        // {tid, pc, lock, 0}
-  LOCK_DESTROY,       // {tid, pc, lock, 0}
-  THR_CREATE_BEFORE,  // Parent thread's event. {tid, pc, 0, 0}
-  THR_CREATE_AFTER,   // Parent thread's event. {tid, 0, 0, child_tid}
-  THR_START,          // Child thread's event {tid, CallStack, 0, parent_tid}
-  THR_FIRST_INSN,     // Used only by valgrind.
-  THR_END,            // {tid, 0, 0, 0}
-  THR_JOIN_AFTER,     // {tid, pc, joined_tid}
-  THR_STACK_TOP,      // {tid, pc, stack_top, stack_size_if_known}
-  RTN_EXIT,           // {tid, 0, 0, 0}
-  RTN_CALL,           // {tid, pc, 0, 0}
-  SBLOCK_ENTER,       // {tid, pc, 0, 0}
-  SIGNAL,             // {tid, pc, obj, 0}
-  WAIT,               // {tid, pc, obj, 0}
-  CYCLIC_BARRIER_INIT,         // {tid, pc, obj, n}
-  CYCLIC_BARRIER_WAIT_BEFORE,  // {tid, pc, obj, 0}
-  CYCLIC_BARRIER_WAIT_AFTER,   // {tid, pc, obj, 0}
-  PCQ_CREATE,         // {tid, pc, pcq_addr, 0}
-  PCQ_DESTROY,        // {tid, pc, pcq_addr, 0}
-  PCQ_PUT,            // {tid, pc, pcq_addr, 0}
-  PCQ_GET,            // {tid, pc, pcq_addr, 0}
-  STACK_MEM_DIE,      // deprecated.
-  MALLOC,             // {tid, pc, addr, size}
-  FREE,               // {tid, pc, addr, 0}
-  MMAP,               // {tid, pc, addr, size}
-  MUNMAP,             // {tid, pc, addr, size}
-  PUBLISH_RANGE,      // may be deprecated later.
-  UNPUBLISH_RANGE,    // deprecated. TODO(kcc): get rid of this.
-  HB_LOCK,            // {tid, pc, addr, 0}
-  NON_HB_LOCK,        // {tid, pc, addr, 0}
-  IGNORE_READS_BEG,   // {tid, pc, 0, 0}
-  IGNORE_READS_END,   // {tid, pc, 0, 0}
-  IGNORE_WRITES_BEG,  // {tid, pc, 0, 0}
-  IGNORE_WRITES_END,  // {tid, pc, 0, 0}
-  SET_THREAD_NAME,    // {tid, pc, name_str, 0}
-  SET_LOCK_NAME,      // {tid, pc, lock, lock_name_str}
-  TRACE_MEM,          // {tid, pc, addr, 0}
-  EXPECT_RACE,        // {tid, descr_str, ptr, size}
-  BENIGN_RACE,        // {tid, descr_str, ptr, size}
-  EXPECT_RACE_BEGIN,  // {tid, pc, 0, 0}
-  EXPECT_RACE_END,    // {tid, pc, 0, 0}
-  VERBOSITY,          // Used for debugging.
-  STACK_TRACE,        // {tid, pc, 0, 0}, for debugging.
-  FLUSH_STATE,        // {tid, pc, 0, 0}
-  PC_DESCRIPTION,     // {0, pc, descr_str, 0}, for ts_offline.
-  PRINT_MESSAGE,      // {tid, pc, message_str, 0}, for ts_offline.
-  FLUSH_EXPECTED_RACES,  // {0, 0, 0, 0}
-  LAST_EVENT          // Should not appear.
-};
-
-#include "ts_event_names.h"  // generated from this file by sed.
-
-class Event {
- public:
-  Event(EventType type, int32_t tid, uintptr_t pc, uintptr_t a, uintptr_t info)
-      : type_(type),
-      tid_(tid),
-      pc_(pc),
-      a_(a),
-      info_(info) {
-      }
-  Event() {}  // Not initialized.
-
-  void Init(EventType type, int32_t tid, uintptr_t pc, uintptr_t a, uintptr_t info) {
-    type_ = type;
-    tid_  = tid;
-    pc_   = pc;
-    a_    = a;
-    info_ = info;
-  }
-
-
-  EventType type()  const { return type_; }
-  int32_t   tid()   const { return tid_; }
-  uintptr_t a()     const { return a_; }
-  uintptr_t pc()    const { return pc_; }
-  uintptr_t info()  const { return info_; }
-  void      Print() const {
-    Printf("T%d: %s [pc=%p; a=%p; i=%p]\n",
-           tid(), TypeString(type()), pc(), a(), info());
-
-  }
-  static const char *TypeString(EventType type) {
-    return kEventNames[type];
-  }
- private:
-  EventType      type_;
-  int32_t   tid_;
-  uintptr_t pc_;
-  uintptr_t a_;
-  uintptr_t info_;
-};
-
-
-// end. {{{1
-#endif  // TS_EVENTS_H_
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/ts_heap_info.h b/tsan/ts_heap_info.h
deleted file mode 100644
index 3681862..0000000
--- a/tsan/ts_heap_info.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-#ifndef TS_HEAP_INFO_
-#define TS_HEAP_INFO_
-
-#include "ts_util.h"
-
-// Information about heap memory.
-// For each heap allocation we create a struct HeapInfo.
-// This struct should have fields 'uintptr_t ptr' and 'uintptr_t size',
-// a default CTOR and a copy CTOR.
-
-template<class HeapInfo>
-class HeapMap {
- public:
-  typedef map<uintptr_t, HeapInfo> map_t;
-  typedef typename map_t::iterator iterator;
-
-  HeapMap() { Reset(); }
-
-  iterator begin() { return ++map_.begin(); }
-  iterator end() { return --map_.end(); }
-
-  size_t size() { return map_.size() - 2; }
-
-  void InsertInfo(uintptr_t a, HeapInfo info) {
-    CHECK(IsValidPtr(a));
-    CHECK(info.ptr == a);
-    map_[a] = info;
-  }
-
-  void EraseInfo(uintptr_t a) {
-    CHECK(IsValidPtr(a));
-    map_.erase(a);
-  }
-
-  void EraseRange(uintptr_t start, uintptr_t end) {
-    CHECK(IsValidPtr(start));
-    CHECK(IsValidPtr(end));
-    // TODO(glider): the [start, end) range may cover several map_ records.
-    EraseInfo(start);
-  }
-
-  HeapInfo *GetInfo(uintptr_t a) {
-    CHECK(this);
-    CHECK(IsValidPtr(a));
-    typename map_t::iterator it = map_.lower_bound(a);
-    CHECK(it != map_.end());
-    if (it->second.ptr == a) {
-      // Exact match. 'a' is the beginning of a heap-allocated address.
-      return &it->second;
-    }
-    CHECK(a < it->second.ptr);
-    CHECK(it != map_.begin());
-    // not an exact match, try the previous iterator.
-    --it;
-    HeapInfo *info = &it->second;
-    CHECK(info->ptr < a);
-    if (info->ptr + info->size > a) {
-      // within the range.
-      return info;
-    }
-    return NULL;
-  }
-
-  void Clear() {
-    map_.clear();
-    Reset();
-  }
-
- private:
-  bool IsValidPtr(uintptr_t a) {
-    return a != 0 && a != (uintptr_t) -1;
-  }
-  void Reset() {
-    // Insert a maximal and minimal possible values to make GetInfo simpler.
-    HeapInfo max_info;
-    memset(&max_info, 0, sizeof(HeapInfo));
-    max_info.ptr = (uintptr_t)-1;
-    map_[max_info.ptr] = max_info;
-
-    HeapInfo min_info;
-    memset(&min_info, 0, sizeof(HeapInfo));
-    map_[min_info.ptr] = min_info;
-  }
-  map_t map_;
-};
-
-#endif  // TS_HEAP_INFO_
diff --git a/tsan/ts_lock.h b/tsan/ts_lock.h
deleted file mode 100644
index 70f10b9..0000000
--- a/tsan/ts_lock.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-
-#ifndef TS_LOCK_H_
-#define TS_LOCK_H_
-
-#include "ts_util.h"
-
-#if (DEBUG > 0) && (TS_SERIALIZED == 0) && defined (TS_LLVM) && !defined(DYNAMIC_ANNOTATIONS_ENABLED)
-# define DYNAMIC_ANNOTATIONS_ENABLED 1
-#endif
-#include "dynamic_annotations.h"
-
-//--------- Simple Lock ------------------ {{{1
-#if defined(TS_VALGRIND) || defined(TS_OFFLINE)
-class TSLock {
- public:
-  void Lock() {};
-  void Unlock() {};
-  void AssertHeld() {};
-};
-#else
-class TSLock {
- public:
-  TSLock();
-  ~TSLock();
-  void Lock();
-  void Unlock();
-  void AssertHeld();
- private:
-  struct Rep;
-  Rep *rep_;
-};
-#endif
-
-class ScopedLock {
- public:
-  ScopedLock(TSLock *lock)
-    : lock_(lock) {
-    lock_->Lock();
-  }
-  ~ScopedLock() { lock_->Unlock(); }
- private:
-  TSLock *lock_;
-};
-
-//--------- Atomic operations {{{1
-#if TS_SERIALIZED == 1
-// No need for atomics when all ThreadSanitizer logic is serialized.
-ALWAYS_INLINE uintptr_t AtomicExchange(uintptr_t *ptr, uintptr_t new_value) {
-  uintptr_t old_value = *ptr;
-  *ptr = new_value;
-  return old_value;
-}
-
-ALWAYS_INLINE void ReleaseStore(uintptr_t *ptr, uintptr_t value) {
-  *ptr = value;
-}
-
-ALWAYS_INLINE int32_t NoBarrier_AtomicIncrement(int32_t* ptr) {
-  return *ptr += 1;
-}
-
-ALWAYS_INLINE int32_t NoBarrier_AtomicDecrement(int32_t* ptr) {
-  return *ptr -= 1;
-}
-
-#elif defined(__GNUC__)
-
-ALWAYS_INLINE uintptr_t AtomicExchange(uintptr_t *ptr, uintptr_t new_value) {
-  return __sync_lock_test_and_set(ptr, new_value);
-}
-
-ALWAYS_INLINE void ReleaseStore(uintptr_t *ptr, uintptr_t value) {
-  __asm__ __volatile__("" : : : "memory");
-  *(volatile uintptr_t*)ptr = value;
-}
-
-ALWAYS_INLINE int32_t NoBarrier_AtomicIncrement(int32_t* ptr) {
-  return __sync_add_and_fetch(ptr, 1);
-}
-
-ALWAYS_INLINE int32_t NoBarrier_AtomicDecrement(int32_t* ptr) {
-  return __sync_sub_and_fetch(ptr, 1);
-}
-
-#elif defined(_MSC_VER)
-uintptr_t AtomicExchange(uintptr_t *ptr, uintptr_t new_value);
-void ReleaseStore(uintptr_t *ptr, uintptr_t value);
-int32_t NoBarrier_AtomicIncrement(int32_t* ptr);
-int32_t NoBarrier_AtomicDecrement(int32_t* ptr);
-
-#else
-# error "unsupported configuration"
-#endif
-
-
-ALWAYS_INLINE int32_t AtomicIncrementRefcount(int32_t *refcount) {
-  return NoBarrier_AtomicIncrement(refcount);
-}
-
-ALWAYS_INLINE int32_t AtomicDecrementRefcount(int32_t *refcount) {
-  ANNOTATE_HAPPENS_BEFORE(refcount);
-  int32_t res = NoBarrier_AtomicDecrement(refcount);
-  if (res == 0) {
-    ANNOTATE_HAPPENS_AFTER(refcount);
-  }
-  return res;
-}
-
-
-
-// end. {{{1
-#endif  // TS_LOCK_H_
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/ts_offline.cc b/tsan/ts_offline.cc
deleted file mode 100644
index 6abc177..0000000
--- a/tsan/ts_offline.cc
+++ /dev/null
@@ -1,385 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-
-// Experimental off-line race detector.
-// Reads program events from a file and detects races.
-// See http://code.google.com/p/data-race-test
-
-// ------------- Includes ------------- {{{1
-#include "thread_sanitizer.h"
-#include "ts_events.h"
-
-#include <stdio.h>
-#include <stdarg.h>
-#include <ctype.h>
-#include <time.h>
-
-// ------------- Globals ------------- {{{1
-static map<string, int> *g_event_type_map;
-struct PcInfo {
-  string img_name;
-  string file_name;
-  string rtn_name;
-  int line;
-};
-
-static map<uintptr_t, PcInfo> *g_pc_info_map;
-
-unsigned long offline_line_n;
-//------------- Read binary file Utils ------------ {{{1
-static const int kBufSize = 65536;
-
-template<typename T>
-static bool Read(FILE *fp, T *res) {
-  unsigned char buf[16];
-  int size = fread(buf, sizeof(T), 1, fp);
-  *res = 0;
-  for (unsigned int i=0; i<sizeof(T); i++) {
-    *res <<= 8;
-    *res += buf[i];
-  }
-  return size == 1;
-}
-
-
-static bool ReadANSI(FILE *file, string *res) {
-  char buf[kBufSize];
-  unsigned short length;
-  if (!Read<unsigned short>(file, &length)) {
-    return false;
-  }
-  int size = fread(buf, 1, (int)length, file);
-  buf[length] = 0;
-  *res = (char *)buf;
-  return size == length;
-}
-//------------- Utils ------------------- {{{1
-static EventType EventNameToEventType(const char *name) {
-  map<string, int>::iterator it = g_event_type_map->find(name);
-  if (it == g_event_type_map->end()) {
-    Printf("Unknown event type: %s\n", name);
-  }
-  CHECK(it != g_event_type_map->end());
-  return (EventType)it->second;
-}
-
-static void InitEventTypeMap() {
-  g_event_type_map = new map<string, int>;
-  for (int i = 0; i < LAST_EVENT; i++) {
-    (*g_event_type_map)[kEventNames[i]] = i;
-  }
-}
-
-static void SkipCommentText(FILE *file) {
-  char buff[kBufSize];
-  int i = 0;
-  while (true) {
-    int c = fgetc(file);
-    if (c == EOF) break;
-    if (c == '\n') {
-      offline_line_n++;
-      break;
-    }
-    if (i < kBufSize - 1)
-      buff[i++] = c;
-  }
-  buff[i] = 0;
-  if (buff[0] == 'P' && buff[1] == 'C') {
-    char img[kBufSize];
-    char rtn[kBufSize];
-    char file[kBufSize];
-    int line = 0;
-    unsigned long pc = 0;
-    if (sscanf(buff, "PC %lx %s %s %s %d", (unsigned long*)&pc,
-               img, rtn, file, &line) == 5 &&
-        pc != 0) {
-      CHECK(g_pc_info_map);
-      PcInfo pc_info;
-      pc_info.img_name = img;
-      pc_info.rtn_name = rtn;
-      pc_info.file_name = file;
-      pc_info.line = line;
-      (*g_pc_info_map)[pc] = pc_info;
-      // Printf("***** PC %lx %s\n", pc, rtn);
-    }
-  }
-  if (buff[0] == '>') {
-    // Just print the rest of comment.
-    Printf("%s\n", buff + 2);
-  }
-}
-
-static void SkipWhiteSpaceAndComments(FILE *file) {
-  int c = 0;
-  while (true) {
-    c = fgetc(file);
-    if (c == EOF) return;
-    if (c == '#' || c == '=') {
-      SkipCommentText(file);
-      continue;
-    }
-    if (isspace(c)) continue;
-    break;
-  }
-  ungetc(c, file);
-}
-
-typedef bool (*EventReader)(FILE *, Event *);
-
-bool ReadOneStrEventFromFile(FILE *file, Event *event) {
-  CHECK(event);
-  char name[1024];
-  uint32_t tid;
-  unsigned long pc, a, info;
-  SkipWhiteSpaceAndComments(file);
-  offline_line_n++;
-  if (5 == fscanf(file, "%s%x%lx%lx%lx", name, &tid, &pc, &a, &info)) {
-    event->Init(EventNameToEventType(name), tid, pc, a, info);
-    return true;
-  }
-  return false;
-}
-
-bool ProcessCodePosition(FILE *input, int *pc, string *str) {
-  bool ok = Read<int>(input, pc);
-  ok &= ReadANSI(input, str);
-  return ok;
-}
-
-bool ProcessMessage(FILE *input, string *str) {
-  return ReadANSI(input, str);
-}
-
-// Read information about event in format: [[[info] address] pc] tid.
-bool ProcessEvent(FILE *input, EventType type, Event *event) {
-  bool ok = true;
-  unsigned short tid = 0;
-  int pc = 0;
-  int64_t address = 0;
-  unsigned short extra = 0;
-  // It's tricky switch without breaks.
-  switch (type) {
-    case THR_START:
-      ok &= Read<unsigned short>(input, &extra);
-      // fallthrough.
-    case READ:
-    case READER_LOCK:
-    case SIGNAL:
-    case THR_JOIN_AFTER:
-    case UNLOCK:
-    case WAIT:
-    case WRITE:
-    case WRITER_LOCK:
-      ok &= Read<int64_t>(input, &address);
-      // fallthrough.
-    case EXPECT_RACE_BEGIN:
-    case EXPECT_RACE_END:
-    case RTN_EXIT:
-    case SBLOCK_ENTER:
-    case STACK_TRACE:
-    case THR_END:
-    case THR_FIRST_INSN:
-      ok &= Read<int>(input, &pc);
-      // fallthrough.
-    case RTN_CALL:
-      ok &= Read<unsigned short>(input, &tid);
-      break;
-    default:
-      // read unsupported EventType.
-      Printf("Unsupported EventType %s %d\n", type, (int)type);
-      CHECK(false);
-  }
-  if (type == READ || type == WRITE) {
-    extra = 1;
-  }
-  event->Init(type, (int)tid, pc, address, (int)extra);
-  return ok;
-}
-
-bool ReadOneBinEventFromFile(FILE *input, Event *event) {
-  CHECK(event);
-  bool ok = true;
-  EventType type;
-  unsigned char typeOrd;
-  int pc;
-  int line;
-  char rtn[kBufSize];
-  char file[kBufSize];
-  string str;
-  while (ok) {
-    offline_line_n++;
-    ok &= Read<unsigned char>(input, &typeOrd);
-    if (!ok) break;
-    type = (EventType)typeOrd;
-    switch (type) {
-      case PC_DESCRIPTION:
-        ok &= ProcessCodePosition(input, &pc, &str);
-        if (sscanf(str.c_str(), "%s %s %d", rtn, file, &line) == 3 && pc != 0) {
-          CHECK(g_pc_info_map);
-          PcInfo pc_info;
-          pc_info.img_name = "java";
-          pc_info.rtn_name = rtn;
-          pc_info.file_name = file;
-          pc_info.line = line;
-          (*g_pc_info_map)[pc] = pc_info;
-        }
-        break;
-      case PRINT_MESSAGE:
-        ok &= ProcessMessage(input, &str);
-        // Just print the rest of comment.
-        Printf("%s\n", str.c_str());
-        break;
-      default:
-        ok &= ProcessEvent(input, type, event);
-        return ok;
-    }
-  }
-  return false;
-}
-
-void DecodeEventsFromFile(FILE *input, FILE *output) {
-  offline_line_n = 0;
-  bool ok = true;
-  EventType type;
-  unsigned char typeOrd;
-  int pc;
-  string str;
-  Event event;
-  while (ok) {
-    ok &= Read<unsigned char>(input, &typeOrd);
-    if (!ok) break;
-    type = (EventType)typeOrd;
-    switch (type) {
-      case PC_DESCRIPTION:
-        ok &= ProcessCodePosition(input, &pc, &str);
-        fprintf(output, "#PC %x java %s\n", pc, str.c_str());
-        break;
-      case PRINT_MESSAGE:
-        ok &= ProcessMessage(input, &str);
-        fprintf(output, "#> %s\n", str.c_str());
-        break;
-      default:
-        ok &= ProcessEvent(input, type, &event);
-        fprintf(output, "%s %x %x %lx %lx\n", kEventNames[event.type()],
-            event.tid(), (unsigned int)event.pc(),
-            (long unsigned int)event.a(), (long unsigned int)event.info());
-        break;
-    }
-    offline_line_n++;
-  }
-  Printf("INFO: ThreadSanitizer write %ld lines.\n", offline_line_n);
-}
-
-static const uint32_t max_unknown_thread = 10000;
-
-static bool known_threads[max_unknown_thread] = {};
-
-INLINE void ReadEventsFromFile(FILE *file, EventReader event_reader_cb) {
-  Event event;
-  uint64_t n_events = 0;
-  offline_line_n = 0;
-  while (event_reader_cb(file, &event)) {
-    //event.Print();
-    n_events++;
-    uint32_t tid = event.tid();
-    if (event.type() == THR_START && tid < max_unknown_thread) {
-      known_threads[tid] = true;
-    }
-    if (tid >= max_unknown_thread || known_threads[tid]) {
-      ThreadSanitizerHandleOneEvent(&event);
-    }
-  }
-  Printf("INFO: ThreadSanitizerOffline: %ld events read\n", n_events);
-}
-//------------- ThreadSanitizer exports ------------ {{{1
-
-void PcToStrings(uintptr_t pc, bool demangle,
-                string *img_name, string *rtn_name,
-                string *file_name, int *line_no) {
-  if (g_pc_info_map->count(pc) == 0) {
-    *img_name = "";
-    *rtn_name = "";
-    *file_name = "";
-    *line_no = 0;
-    return;
-  }
-  PcInfo &info = (*g_pc_info_map)[pc];
-  *img_name = info.img_name;
-  *rtn_name = info.rtn_name;
-  *file_name = info.file_name;
-  *line_no = info.line;
-  if (*file_name == "unknown")
-    *file_name = "";
-}
-
-string PcToRtnName(uintptr_t pc, bool demangle) {
-  string img, rtn, file;
-  int line;
-  PcToStrings(pc, demangle, &img, &rtn, &file, &line);
-  return rtn;
-}
-//------------- main ---------------------------- {{{1
-int main(int argc, char *argv[]) {
-  Printf("INFO: ThreadSanitizerOffline r%s\n", TS_VERSION);
-
-  InitEventTypeMap();
-  g_pc_info_map = new map<uintptr_t, PcInfo>;
-  G_flags = new FLAGS;
-
-  vector<string> args(argv + 1, argv + argc);
-  ThreadSanitizerParseFlags(&args);
-  ThreadSanitizerInit();
-
-  CHECK(G_flags);
-  if (G_flags->input_type == "bin") {
-    ReadEventsFromFile(stdin, ReadOneBinEventFromFile);
-  } else if (G_flags->input_type == "decode") {
-    FILE* output;
-    if (G_flags->log_file.size() > 0) {
-      output = fopen(G_flags->log_file.c_str(), "w");
-    } else {
-      output = stdout;
-    }
-    DecodeEventsFromFile(stdin, output);
-  } else if (G_flags->input_type == "str") {
-    ReadEventsFromFile(stdin, ReadOneStrEventFromFile);
-  } else {
-    Printf("Error: Unknown input_type value %s\n", G_flags->input_type.c_str());
-    exit(5);
-  }
-
-  ThreadSanitizerFini();
-  if (G_flags->error_exitcode && GetNumberOfFoundErrors() > 0) {
-    return G_flags->error_exitcode;
-  }
-}
-
-// end. {{{1
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/ts_pin.cc b/tsan/ts_pin.cc
deleted file mode 100644
index 80c08c1..0000000
--- a/tsan/ts_pin.cc
+++ /dev/null
@@ -1,3662 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-
-#define __STDC_LIMIT_MACROS
-#include "pin.H"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <map>
-#include <assert.h>
-
-#include "thread_sanitizer.h"
-#include "ts_lock.h"
-#include "ts_trace_info.h"
-#include "ts_race_verifier.h"
-#include "common_util.h"
-
-
-#if defined(__GNUC__)
-# include <cxxabi.h>  // __cxa_demangle
-# define ATOMIC_READ(a) __sync_add_and_fetch(a, 0)
-
-#elif defined(_MSC_VER)
-namespace WINDOWS
-{
-// This is the way of including winows.h recommended by PIN docs.
-#include<Windows.h>
-}
-
-#include <intrin.h>
-# define popen(x,y) (NULL)
-# define ATOMIC_READ(a)         _InterlockedCompareExchange(a, 0, 0)
-# define usleep(x) WINDOWS::Sleep((x)/1000)
-# define UINTPTR_MAX ((uintptr_t)-1)
-#endif
-
-#ifdef NDEBUG
-# error "Please don't define NDEBUG"
-#endif
-
-static void DumpEvent(CONTEXT *ctx, EventType type, int32_t tid, uintptr_t pc,
-                      uintptr_t a, uintptr_t info);
-//------ Global PIN lock ------- {{{1
-class ScopedReentrantClientLock {
- public:
-  ScopedReentrantClientLock(int line)
-    : line_(line) {
-    // if (line && G_flags->debug_level >= 5)  Printf("??Try  at line %d\n", line);
-    PIN_LockClient();
-    if (line && G_flags->debug_level >= 5)  Printf("++Lock at line %d\n", line);
-  }
-  ~ScopedReentrantClientLock() {
-    if (line_ && G_flags->debug_level >= 5) Printf("--Unlock at line %d\n", line_);
-    PIN_UnlockClient();
-  }
- private:
-  int line_;
-};
-
-//--------------- Globals ----------------- {{{1
-extern FILE *G_out;
-
-// Number of threads created by pthread_create (i.e. not counting main thread).
-static int n_created_threads = 0;
-// Number of started threads, i.e. the number of CallbackForThreadStart calls.
-static int n_started_threads = 0;
-
-const uint32_t kMaxThreads = PIN_MAX_THREADS;
-
-// Serializes the ThreadSanitizer callbacks if TS_SERIALIZED==1
-static TSLock g_main_ts_lock;
-
-// Serializes calls to pthread_create and CreateThread.
-static TSLock g_thread_create_lock;
-// Under g_thread_create_lock.
-static THREADID g_tid_of_thread_which_called_create_thread = -1;
-
-#ifdef _MSC_VER
-// On Windows, we need to create a h-b arc between
-// RtlQueueWorkItem(callback, x, y) and the call to callback.
-// Same for RegisterWaitForSingleObject.
-static unordered_set<uintptr_t> *g_windows_thread_pool_calback_set;
-// Similarly, we need h-b arcs between the returns from callbacks and
-// thre related UnregisterWaitEx. Damn, what a stupid interface!
-static unordered_map<uintptr_t, uintptr_t> *g_windows_thread_pool_wait_object_map;
-#endif
-
-//--------------- StackFrame ----------------- {{{1
-struct StackFrame {
-  uintptr_t pc;
-  uintptr_t sp;
-  StackFrame(uintptr_t p, uintptr_t s) : pc(p), sp(s) { }
-};
-//--------------- InstrumentedCallFrame ----- {{{1
-// Machinery to implement the fast interceptors in PIN
-// (i.e. the ones that don't use PIN_CallApplicationFunction).
-// We instrument the entry of the interesting function (e.g. malloc)
-// and all RET instructions in this function's module (e.g. libc).
-// At entry, we push an InstrumentedCallFrame object onto InstrumentedCallStack.
-// At every RET instruction we check if the stack is not empty (fast path)
-// and if the top contains the current SP. If yes -- this is the function return
-// and we pop the stack.
-struct InstrumentedCallFrame {
-  typedef void (*callback_t)(THREADID tid, InstrumentedCallFrame &frame,
-                             ADDRINT ret);
-  callback_t callback;
-  uintptr_t pc;
-  uintptr_t sp;
-  uintptr_t arg[4];
-};
-
-struct InstrumentedCallStack {
- public:
-  InstrumentedCallStack() : size_(0) { }
-
-  size_t size() { return size_; }
-
-  void Push(InstrumentedCallFrame::callback_t callback,
-            uintptr_t pc,
-            uintptr_t sp,
-            uintptr_t a0, uintptr_t a1) {
-    CHECK(size() < TS_ARRAY_SIZE(frames_));
-    size_++;
-    Top()->callback = callback;
-    Top()->pc = pc;
-    Top()->sp = sp;
-    Top()->arg[0] = a0;
-    Top()->arg[1] = a1;
-  }
-
-  void Pop() {
-    CHECK(size() > 0);
-    size_--;
-  }
-
-  InstrumentedCallFrame *Top() {
-    CHECK(size() > 0);
-    return &frames_[size_-1];
-  }
-
-  void Print() {
-    for (size_t i = 0; i < size(); i++) {
-      Printf( " %p\n", frames_[i].sp);
-      if (i > 0) CHECK(frames_[i].sp <= frames_[i-1].sp);
-    }
-  }
-
- private:
-  InstrumentedCallFrame frames_[20];
-  size_t size_;
-};
-
-//--------------- PinThread ----------------- {{{1
-const size_t kThreadLocalEventBufferSize = 2048 - 2;
-// The number of mops should be at least 2 less than the size of TLEB
-// so that we have space to put SBLOCK_ENTER token and the trace_info ptr.
-const size_t kMaxMopsPerTrace = kThreadLocalEventBufferSize - 2;
-
-REG tls_reg;
-
-struct PinThread;
-
-struct ThreadLocalEventBuffer {
-  PinThread *t;
-  size_t size;
-  uintptr_t events[kThreadLocalEventBufferSize];
-};
-
-struct PinThread {
-  ThreadLocalEventBuffer tleb;
-  int          uniq_tid;
-  uint32_t     literace_sampling;  // cache of a flag.
-  volatile long last_child_tid;
-  InstrumentedCallStack ic_stack;
-  THREADID     tid;
-  THREADID     parent_tid;
-  pthread_t    my_ptid;
-  size_t       thread_stack_size_if_known;
-  size_t       last_child_stack_size_if_known;
-  vector<StackFrame> shadow_stack;
-  TraceInfo    *trace_info;
-  int ignore_accesses;  // if > 0, ignore all memory accesses.
-  int ignore_accesses_depth;
-  int ignore_sync;      // if > 0, ignore all sync events.
-  int spin_lock_recursion_depth;
-  bool         thread_finished;
-  bool         thread_done;
-  bool         holding_lock;
-  int          n_consumed_events;
-#ifdef _MSC_VER
-  enum StartupState {
-    STARTING,
-    CHILD_READY,
-    MAY_CONTINUE,
-  };
-  volatile long startup_state;  // used to handle the CREATE_SUSPENDED flag.
-#endif
-  char         padding[64];  // avoid any chance of ping-pong.
-};
-
-// Array of pin threads, indexed by pin's THREADID.
-static PinThread *g_pin_threads;
-
-// If true, ignore all accesses in all threads.
-extern bool global_ignore;
-
-#ifdef _MSC_VER
-static unordered_set<pthread_t> *g_win_handles_which_are_threads;
-#endif
-
-//-------------------- ts_replace ------------------- {{{1
-static void ReportAccesRange(THREADID tid, uintptr_t pc, EventType type, uintptr_t x, size_t size) {
-  if (size && !g_pin_threads[tid].ignore_accesses) {
-    uintptr_t end = x + size;
-    for(uintptr_t a = x; a < end; a += 8) {
-      size_t cur_size = min((uintptr_t)8, end - a);
-      DumpEvent(0, type, tid, pc, a, cur_size);
-    }
-  }
-}
-
-#define REPORT_READ_RANGE(x, size) ReportAccesRange(tid, pc, READ, (uintptr_t)x, size)
-#define REPORT_WRITE_RANGE(x, size) ReportAccesRange(tid, pc, WRITE, (uintptr_t)x, size)
-
-#define EXTRA_REPLACE_PARAMS THREADID tid, uintptr_t pc,
-#define EXTRA_REPLACE_ARGS tid, pc,
-#include "ts_replace.h"
-
-//------------- ThreadSanitizer exports ------------ {{{1
-string Demangle(const char *str) {
-#if defined(__GNUC__)
-  int status;
-  char *demangled = __cxxabiv1::__cxa_demangle(str, 0, 0, &status);
-  if (demangled) {
-    string res = demangled;
-    free(demangled);
-    return res;
-  }
-#endif
-  return str;
-}
-
-void PcToStrings(uintptr_t pc, bool demangle,
-                string *img_name, string *rtn_name,
-                string *file_name, int *line_no) {
-  if (G_flags->symbolize) {
-    RTN rtn;
-    ScopedReentrantClientLock lock(__LINE__);
-    // ClientLock must be held.
-    PIN_GetSourceLocation(pc, NULL, line_no, file_name);
-    *file_name = ConvertToPlatformIndependentPath(*file_name);
-    rtn = RTN_FindByAddress(pc);
-    string name;
-    if (RTN_Valid(rtn)) {
-      *rtn_name = demangle
-          ? Demangle(RTN_Name(rtn).c_str())
-          : RTN_Name(rtn);
-      *img_name = IMG_Name(SEC_Img(RTN_Sec(rtn)));
-    }
-  }
-}
-
-string PcToRtnName(uintptr_t pc, bool demangle) {
-  string res;
-  if (G_flags->symbolize) {
-    {
-      ScopedReentrantClientLock lock(__LINE__);
-      RTN rtn = RTN_FindByAddress(pc);
-      if (RTN_Valid(rtn)) {
-        res = demangle
-            ? Demangle(RTN_Name(rtn).c_str())
-            : RTN_Name(rtn);
-      }
-    }
-  }
-  return res;
-}
-
-//--------------- ThreadLocalEventBuffer ----------------- {{{1
-// thread local event buffer is an array of uintptr_t.
-// The events are encoded like this:
-// { RTN_CALL, call_pc, target_pc }
-// { RTN_EXIT }
-// { SBLOCK_ENTER, trace_info_of_size_n, addr1, addr2, ... addr_n}
-
-enum TLEBSpecificEvents {
-  TLEB_IGNORE_ALL_BEGIN = LAST_EVENT + 1,
-  TLEB_IGNORE_ALL_END,
-  TLEB_IGNORE_SYNC_BEGIN,
-  TLEB_IGNORE_SYNC_END,
-  TLEB_GLOBAL_IGNORE_ON,
-  TLEB_GLOBAL_IGNORE_OFF,
-};
-
-static bool DumpEventPlainText(EventType type, int32_t tid, uintptr_t pc,
-                        uintptr_t a, uintptr_t info) {
-#if DEBUG == 0 || defined(_MSC_VER)
-  return false;
-#else
-  if (G_flags->dump_events.empty()) return false;
-
-  static unordered_set<uintptr_t> *pc_set;
-  if (pc_set == NULL) {
-    pc_set = new unordered_set<uintptr_t>;
-  }
-  static FILE *log_file = NULL;
-  if (log_file == NULL) {
-    log_file = popen(("gzip > " + G_flags->dump_events).c_str(), "w");
-  }
-  if (G_flags->symbolize && pc_set->insert(pc).second) {
-    string img_name, rtn_name, file_name;
-    int line = 0;
-    PcToStrings(pc, false, &img_name, &rtn_name, &file_name, &line);
-    if (file_name.empty()) file_name = "unknown";
-    if (img_name.empty()) img_name = "unknown";
-    if (rtn_name.empty()) rtn_name = "unknown";
-    if (line == 0) line = 1;
-    fprintf(log_file, "#PC %lx %s %s %s %d\n",
-            (long)pc, img_name.c_str(), rtn_name.c_str(),
-            file_name.c_str(), line);
-  }
-  fprintf(log_file, "%s %x %lx %lx %lx\n", kEventNames[type], tid,
-          (long)pc, (long)a, (long)info);
-  return true;
-#endif
-}
-
-static void DumpEventInternal(EventType type, int32_t uniq_tid, uintptr_t pc,
-                              uintptr_t a, uintptr_t info) {
-  if (DumpEventPlainText(type, uniq_tid, pc, a, info)) return;
-  // PIN wraps the tid (after 2048), but we need a uniq tid.
-  Event event(type, uniq_tid, pc, a, info);
-  ThreadSanitizerHandleOneEvent(&event);
-}
-
-void ComputeIgnoreAccesses(PinThread &t) {
-  t.ignore_accesses = (t.ignore_accesses_depth != 0) || (global_ignore != 0);
-}
-
-static void HandleInnerEvent(PinThread &t, uintptr_t event) {
-  DCHECK(event > LAST_EVENT);
-  if (event == TLEB_IGNORE_ALL_BEGIN){
-    t.ignore_accesses_depth++;
-    ComputeIgnoreAccesses(t);
-  } else if (event == TLEB_IGNORE_ALL_END){
-    t.ignore_accesses_depth--;
-    CHECK(t.ignore_accesses_depth >= 0);
-    ComputeIgnoreAccesses(t);
-  } else if (event == TLEB_IGNORE_SYNC_BEGIN){
-    t.ignore_sync++;
-  } else if (event == TLEB_IGNORE_SYNC_END){
-    t.ignore_sync--;
-    CHECK(t.ignore_sync >= 0);
-  } else if (event == TLEB_GLOBAL_IGNORE_ON){
-    Report("INFO: GLOBAL IGNORE ON\n");
-    global_ignore = true;
-    ComputeIgnoreAccesses(t);
-  } else if (event == TLEB_GLOBAL_IGNORE_OFF){
-    Report("INFO: GLOBAL IGNORE OFF\n");
-    global_ignore = false;
-    ComputeIgnoreAccesses(t);
-  } else {
-    Printf("Event: %ld (last: %ld)\n", event, LAST_EVENT);
-    CHECK(0);
-  }
-}
-
-static INLINE bool WantToIgnoreEvent(PinThread &t, uintptr_t event) {
-  if (t.ignore_sync &&
-      (event == WRITER_LOCK || event == READER_LOCK || event == UNLOCK ||
-       event == SIGNAL || event == WAIT)) {
-    // do nothing, we are ignoring locks.
-    return true;
-  } else if (t.ignore_accesses && (event == READ || event == WRITE)) {
-    // do nothing, we are ignoring mops.
-    return true;
-  }
-  return false;
-}
-
-static INLINE void TLEBFlushUnlocked(ThreadLocalEventBuffer &tleb) {
-  if (tleb.size == 0) return;
-  PinThread &t = *tleb.t;
-  // global_ignore should be always on with race verifier
-  DCHECK(!g_race_verifier_active || global_ignore);
-  DCHECK(tleb.size <= kThreadLocalEventBufferSize);
-  if (DEBUG_MODE && t.thread_done) {
-    Printf("ACHTUNG!!! an event from a dead thread T%d\n", t.tid);
-  }
-  DCHECK(!t.thread_done);
-
-  if (TS_SERIALIZED == 1 || DEBUG_MODE) {
-    size_t max_idx = TS_ARRAY_SIZE(G_stats->tleb_flush);
-    size_t idx = min((size_t)u32_log2(tleb.size), max_idx - 1);
-    CHECK(idx < max_idx);
-    G_stats->tleb_flush[idx]++;
-  }
-
-  if (TS_SERIALIZED == 1 && G_flags->offline) {
-    fwrite(tleb.events, sizeof(uintptr_t), tleb.size, G_out);
-    tleb.size = 0;
-    return;
-  }
-
-  size_t i;
-  for (i = 0; i < tleb.size; ) {
-    uintptr_t event = tleb.events[i++];
-    DCHECK(!g_race_verifier_active ||
-        event == SBLOCK_ENTER || event == EXPECT_RACE || event == THR_START);
-    if (event == RTN_EXIT) {
-      if (DumpEventPlainText(RTN_EXIT, t.uniq_tid, 0, 0, 0)) continue;
-      ThreadSanitizerHandleRtnExit(t.uniq_tid);
-    } else if (event == RTN_CALL) {
-      uintptr_t call_pc = tleb.events[i++];
-      uintptr_t target_pc = tleb.events[i++];
-      IGNORE_BELOW_RTN ignore_below = (IGNORE_BELOW_RTN)tleb.events[i++];
-      if (DumpEventPlainText(RTN_CALL, t.uniq_tid, call_pc,
-                             target_pc, ignore_below)) continue;
-      ThreadSanitizerHandleRtnCall(t.uniq_tid, call_pc, target_pc,
-                                   ignore_below);
-    } else if (event == SBLOCK_ENTER){
-      TraceInfo *trace_info = (TraceInfo*) tleb.events[i++];
-      DCHECK(trace_info);
-      bool do_this_trace = true;
-      if (t.ignore_accesses) {
-        do_this_trace = false;
-      } else if (t.literace_sampling) {
-        do_this_trace = !trace_info->LiteRaceSkipTraceRealTid(
-            t.uniq_tid, t.literace_sampling);
-      }
-
-      size_t n = trace_info->n_mops();
-      if (do_this_trace) {
-        if (DEBUG_MODE && !G_flags->dump_events.empty()) {
-          DumpEventPlainText(SBLOCK_ENTER, t.uniq_tid, trace_info->pc(), 0, 0);
-          for (size_t j = 0; j < n; j++) {
-            MopInfo *mop = trace_info->GetMop(j);
-            DCHECK(mop->size());
-            DCHECK(mop);
-            uintptr_t addr = tleb.events[i + j];
-            if (addr) {
-              DumpEventPlainText(mop->is_write() ? WRITE : READ, t.uniq_tid,
-                                     mop->pc(), addr, mop->size());
-            }
-          }
-        } else {
-          ThreadSanitizerHandleTrace(t.uniq_tid, trace_info, tleb.events+i);
-        }
-      }
-      i += n;
-    } else if (event == THR_START) {
-      uintptr_t parent = -1;
-      if (t.parent_tid != (THREADID)-1) {
-        parent = g_pin_threads[t.parent_tid].uniq_tid;
-      }
-      DumpEventInternal(THR_START, t.uniq_tid, 0, 0, parent);
-    } else if (event == THR_END) {
-      DumpEventInternal(THR_END, t.uniq_tid, 0, 0, 0);
-      DCHECK(t.thread_finished == true);
-      DCHECK(t.thread_done == false);
-      t.thread_done = true;
-      i += 3;  // consume the unneeded data.
-      DCHECK(i == tleb.size);  // should be last event in this tleb.
-    } else if (event > LAST_EVENT) {
-      HandleInnerEvent(t, event);
-    } else {
-      // all other events.
-      CHECK(event > NOOP && event < LAST_EVENT);
-      uintptr_t pc    = tleb.events[i++];
-      uintptr_t a     = tleb.events[i++];
-      uintptr_t info  = tleb.events[i++];
-      if (!WantToIgnoreEvent(t, event)) {
-        DumpEventInternal((EventType)event, t.uniq_tid, pc, a, info);
-      }
-    }
-  }
-  DCHECK(i == tleb.size);
-  tleb.size = 0;
-  if (DEBUG_MODE) { // for sanity checking.
-    memset(tleb.events, 0xf0, sizeof(tleb.events));
-  }
-}
-
-static INLINE void TLEBFlushLocked(PinThread &t) {
-#if TS_SERIALIZED==1
-  if (G_flags->dry_run) {
-    t.tleb.size = 0;
-    return;
-  }
-  CHECK(t.tleb.size <= kThreadLocalEventBufferSize);
-  G_stats->lock_sites[0]++;
-  ScopedLock lock(&g_main_ts_lock);
-  TLEBFlushUnlocked(t.tleb);
-#else
-  TLEBFlushUnlocked(t.tleb);
-#endif
-}
-
-static void TLEBAddRtnCall(PinThread &t, uintptr_t call_pc,
-                           uintptr_t target_pc, IGNORE_BELOW_RTN ignore_below) {
-  if (TS_SERIALIZED == 0) {
-    TLEBFlushLocked(t);
-    ThreadSanitizerHandleRtnCall(t.uniq_tid, call_pc, target_pc,
-                                 ignore_below);
-    return;
-  }
-  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
-  if (t.tleb.size + 4 > kThreadLocalEventBufferSize) {
-    TLEBFlushLocked(t);
-    DCHECK(t.tleb.size == 0);
-  }
-  t.tleb.events[t.tleb.size++] = RTN_CALL;
-  t.tleb.events[t.tleb.size++] = call_pc;
-  t.tleb.events[t.tleb.size++] = target_pc;
-  t.tleb.events[t.tleb.size++] = ignore_below;
-  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
-}
-
-static void TLEBAddRtnExit(PinThread &t) {
-  if (TS_SERIALIZED == 0) {
-    TLEBFlushLocked(t);
-    ThreadSanitizerHandleRtnExit(t.uniq_tid);
-    return;
-  }
-  if (t.tleb.size + 1 > kThreadLocalEventBufferSize) {
-    TLEBFlushLocked(t);
-  }
-  t.tleb.events[t.tleb.size++] = RTN_EXIT;
-  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
-}
-
-static INLINE uintptr_t *TLEBAddTrace(PinThread &t) {
-  size_t n = t.trace_info->n_mops();
-  DCHECK(n > 0);
-  if (TS_SERIALIZED == 0) {
-    TLEBFlushLocked(t);
-  } else if (t.tleb.size + 2 + n > kThreadLocalEventBufferSize) {
-    TLEBFlushLocked(t);
-  }
-  if (TS_SERIALIZED == 1) {
-    t.tleb.events[t.tleb.size++] = SBLOCK_ENTER;
-    t.tleb.events[t.tleb.size++] = (uintptr_t)t.trace_info;
-  } else {
-    DCHECK(t.tleb.size == 0);
-    t.tleb.events[0] = SBLOCK_ENTER;
-    t.tleb.events[1] = (uintptr_t)t.trace_info;
-    t.tleb.size += 2;
-  }
-  uintptr_t *mop_addresses = &t.tleb.events[t.tleb.size];
-  // not every address will be written to. so they will stay 0.
-  for (size_t i = 0; i < n; i++) {
-    mop_addresses[i] = 0;
-  }
-  t.tleb.size += n;
-  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
-  return mop_addresses;
-}
-
-static void TLEBStartThread(PinThread &t) {
-  CHECK(t.tleb.size == 0);
-  t.tleb.events[t.tleb.size++] = THR_START;
-}
-
-static void TLEBSimpleEvent(PinThread &t, uintptr_t event) {
-  if (g_race_verifier_active)
-    return;
-  if (TS_SERIALIZED == 0) {
-    TLEBFlushLocked(t);
-    if (event < LAST_EVENT) {
-      Event e((EventType)event, t.uniq_tid, 0, 0, 0);
-      ThreadSanitizerHandleOneEvent(&e);
-    } else {
-      HandleInnerEvent(t, event);
-    }
-    return;
-  }
-  if (t.tleb.size + 1 > kThreadLocalEventBufferSize) {
-    TLEBFlushLocked(t);
-  }
-  t.tleb.events[t.tleb.size++] = event;
-  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
-}
-
-static void TLEBAddGenericEventAndFlush(PinThread &t,
-                                        EventType type, uintptr_t pc,
-                                        uintptr_t a, uintptr_t info) {
-  if (TS_SERIALIZED == 0) {
-    if (WantToIgnoreEvent(t, type)) return;
-    TLEBFlushLocked(t);
-    Event e(type, t.uniq_tid, pc, a, info);
-    ThreadSanitizerHandleOneEvent(&e);
-    return;
-  }
-  if (t.tleb.size + 4 > kThreadLocalEventBufferSize) {
-    TLEBFlushLocked(t);
-  }
-  DCHECK(type > NOOP && type < LAST_EVENT);
-  t.tleb.events[t.tleb.size++] = type;
-  t.tleb.events[t.tleb.size++] = pc;
-  t.tleb.events[t.tleb.size++] = a;
-  t.tleb.events[t.tleb.size++] = info;
-  TLEBFlushLocked(t);
-  DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
-}
-
-static void UpdateCallStack(PinThread &t, ADDRINT sp);
-
-// Must be called from its thread (except for THR_END case)!
-static void DumpEventWithSp(uintptr_t sp, EventType type, int32_t tid, uintptr_t pc,
-                            uintptr_t a, uintptr_t info) {
-  if (!g_race_verifier_active || type == EXPECT_RACE) {
-    PinThread &t = g_pin_threads[tid];
-    if (sp) {
-      UpdateCallStack(t, sp);
-    }
-    TLEBAddGenericEventAndFlush(t, type, pc, a, info);
-  }
-}
-static void DumpEvent(CONTEXT *ctx, EventType type, int32_t tid, uintptr_t pc,
-                      uintptr_t a, uintptr_t info) {
-  DumpEventWithSp(ctx ? PIN_GetContextReg(ctx, REG_STACK_PTR) : 0,
-            type, tid, pc, a, info);
-}
-
-//--------- Wraping and relacing --------------- {{{1
-static set<string> g_wrapped_functions;
-static void InformAboutFunctionWrap(RTN rtn, string name) {
-  g_wrapped_functions.insert(name);
-  if (!debug_wrap) return;
-  Printf("Function wrapped: %s (%s %s)\n", name.c_str(),
-         RTN_Name(rtn).c_str(), IMG_Name(SEC_Img(RTN_Sec(rtn))).c_str());
-}
-
-static bool RtnMatchesName(const string &rtn_name, const string &name) {
-  CHECK(name.size() > 0);
-  size_t pos = rtn_name.find(name);
-  if (pos == string::npos) {
-    return false;
-  }
-  if (pos == 0 && name.size() == rtn_name.size()) {
-  //  Printf("Full match: %s %s\n", rtn_name.c_str(), name.c_str());
-    return true;
-  }
-  // match MyFuncName@123
-  if (pos == 0 && name.size() < rtn_name.size()
-      && rtn_name[name.size()] == '@') {
-  //  Printf("Versioned match: %s %s\n", rtn_name.c_str(), name.c_str());
-    return true;
-  }
-  // match _MyFuncName@123
-  if (pos == 1 && rtn_name[0] == '_' && name.size() < rtn_name.size()
-      && rtn_name[name.size() + 1] == '@') {
-    // Printf("Versioned match: %s %s\n", rtn_name.c_str(), name.c_str());
-    return true;
-  }
-
-  return false;
-}
-
-#define FAST_WRAP_PARAM0 THREADID tid, ADDRINT pc, ADDRINT sp
-#define FAST_WRAP_PARAM1 FAST_WRAP_PARAM0, ADDRINT arg0
-#define FAST_WRAP_PARAM2 FAST_WRAP_PARAM1, ADDRINT arg1
-#define FAST_WRAP_PARAM3 FAST_WRAP_PARAM2, ADDRINT arg2
-
-#define FAST_WRAP_PARAM_AFTER \
-  THREADID tid, InstrumentedCallFrame &frame, ADDRINT ret
-
-
-#define DEBUG_FAST_INTERCEPTORS 0
-//#define DEBUG_FAST_INTERCEPTORS (tid == 1)
-
-#define PUSH_AFTER_CALLBACK1(callback, a0) \
-  g_pin_threads[tid].ic_stack.Push(callback, pc, sp, a0, 0); \
-  if (DEBUG_FAST_INTERCEPTORS) \
-    Printf("T%d %s pc=%p sp=%p *sp=(%p) arg0=%p stack_size=%ld\n",\
-         tid, __FUNCTION__, pc, sp,\
-         ((void**)sp)[0],\
-         arg0,\
-         g_pin_threads[tid].ic_stack.size()\
-         );\
-
-
-#define WRAP_NAME(name) Wrap_##name
-#define WRAP4(name) WrapFunc4(img, rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD1(name) WrapStdCallFunc1(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD2(name) WrapStdCallFunc2(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD3(name) WrapStdCallFunc3(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD4(name) WrapStdCallFunc4(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD5(name) WrapStdCallFunc5(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD6(name) WrapStdCallFunc6(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD7(name) WrapStdCallFunc7(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD8(name) WrapStdCallFunc8(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD10(name) WrapStdCallFunc10(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAPSTD11(name) WrapStdCallFunc11(rtn, #name, (AFUNPTR)Wrap_##name)
-#define WRAP_PARAM4  THREADID tid, ADDRINT pc, CONTEXT *ctx, \
-                                AFUNPTR f,\
-                                uintptr_t arg0, uintptr_t arg1, \
-                                uintptr_t arg2, uintptr_t arg3
-
-#define WRAP_PARAM6 WRAP_PARAM4, uintptr_t arg4, uintptr_t arg5
-#define WRAP_PARAM8 WRAP_PARAM6, uintptr_t arg6, uintptr_t arg7
-#define WRAP_PARAM10 WRAP_PARAM8, uintptr_t arg8, uintptr_t arg9
-#define WRAP_PARAM12 WRAP_PARAM10, uintptr_t arg10, uintptr_t arg11
-
-static uintptr_t CallFun4(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
-                         uintptr_t arg2, uintptr_t arg3) {
-  uintptr_t ret = 0xdeadbee1;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_DEFAULT, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG(uintptr_t), arg1,
-                              PIN_PARG(uintptr_t), arg2,
-                              PIN_PARG(uintptr_t), arg3,
-                              PIN_PARG_END());
-  return ret;
-}
-
-static uintptr_t CallFun6(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
-                         uintptr_t arg2, uintptr_t arg3,
-                         uintptr_t arg4, uintptr_t arg5) {
-  uintptr_t ret = 0xdeadbee1;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_DEFAULT, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG(uintptr_t), arg1,
-                              PIN_PARG(uintptr_t), arg2,
-                              PIN_PARG(uintptr_t), arg3,
-                              PIN_PARG(uintptr_t), arg4,
-                              PIN_PARG(uintptr_t), arg5,
-                              PIN_PARG_END());
-  return ret;
-}
-
-#define CALL_ME_INSIDE_WRAPPER_4() CallFun4(ctx, tid, f, arg0, arg1, arg2, arg3)
-#define CALL_ME_INSIDE_WRAPPER_6() CallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5)
-
-// Completely replace (i.e. not wrap) a function with 3 (or less) parameters.
-// The original function will not be called.
-void ReplaceFunc3(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_DEFAULT,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-// Wrap a function with up to 4 parameters.
-void WrapFunc4(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_DEFAULT,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-// Wrap a function with up to 6 parameters.
-void WrapFunc6(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_DEFAULT,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-
-//--------- Instrumentation callbacks --------------- {{{1
-//---------- Debug -----------------------------------{{{2
-#define DEB_PR (0)
-
-static void ShowPcAndSp(const char *where, THREADID tid,
-                        ADDRINT pc, ADDRINT sp) {
-    Printf("%s T%d sp=%ld pc=%p %s\n", where, tid, sp, pc,
-           PcToRtnName(pc, true).c_str());
-}
-
-static void PrintShadowStack(PinThread &t) {
-  Printf("T%d Shadow stack (%d)\n", t.tid, (int)t.shadow_stack.size());
-  for (int i = t.shadow_stack.size() - 1; i >= 0; i--) {
-    uintptr_t pc = t.shadow_stack[i].pc;
-    uintptr_t sp = t.shadow_stack[i].sp;
-    Printf("  sp=%ld pc=%lx %s\n", sp, pc, PcToRtnName(pc, true).c_str());
-  }
-}
-
-static void DebugOnlyShowPcAndSp(const char *where, THREADID tid,
-                                 ADDRINT pc, ADDRINT sp) {
-  if (DEB_PR) {
-    ShowPcAndSp(where, tid, pc, sp);
-  }
-}
-
-static uintptr_t WRAP_NAME(ThreadSanitizerQuery)(WRAP_PARAM4) {
-  const char *query = (const char*)arg0;
-  return (uintptr_t)ThreadSanitizerQuery(query);
-}
-
-//--------- Ignores -------------------------------- {{{2
-static void IgnoreMopsBegin(THREADID tid) {
-  // if (tid != 0) Printf("T%d IgnoreMops++\n", tid);
-  TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_ALL_BEGIN);
-}
-static void IgnoreMopsEnd(THREADID tid) {
-  // if (tid != 0) Printf("T%d IgnoreMops--\n", tid);
-  TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_ALL_END);
-}
-
-static void IgnoreSyncAndMopsBegin(THREADID tid) {
-  // if (tid != 0) Printf("T%d IgnoreSync++\n", tid);
-  IgnoreMopsBegin(tid);
-  TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_SYNC_BEGIN);
-}
-static void IgnoreSyncAndMopsEnd(THREADID tid) {
-  // if (tid != 0) Printf("T%d IgnoreSync--\n", tid);
-  IgnoreMopsEnd(tid);
-  TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_SYNC_END);
-}
-
-//--------- __cxa_guard_* -------------------------- {{{2
-// From gcc/cp/decl.c:
-// --------------------------------------------------------------
-//      Emit code to perform this initialization but once.  This code
-//      looks like:
-//
-//      static <type> guard;
-//      if (!guard.first_byte) {
-//        if (__cxa_guard_acquire (&guard)) {
-//          bool flag = false;
-//          try {
-//            // Do initialization.
-//            flag = true; __cxa_guard_release (&guard);
-//            // Register variable for destruction at end of program.
-//           } catch {
-//          if (!flag) __cxa_guard_abort (&guard);
-//         }
-//      }
-// --------------------------------------------------------------
-// So, when __cxa_guard_acquire returns true, we start ignoring all accesses
-// and in __cxa_guard_release we stop ignoring them.
-// We also need to ignore all accesses inside these two functions.
-
-static void Before_cxa_guard_acquire(THREADID tid, ADDRINT pc, ADDRINT guard) {
-  IgnoreMopsBegin(tid);
-}
-
-static void After_cxa_guard_acquire(THREADID tid, ADDRINT pc, ADDRINT ret) {
-  if (ret) {
-    // Continue ignoring, it will end in __cxa_guard_release.
-  } else {
-    // Stop ignoring, there will be no matching call to __cxa_guard_release.
-    IgnoreMopsEnd(tid);
-  }
-}
-
-static void After_cxa_guard_release(THREADID tid, ADDRINT pc) {
-  IgnoreMopsEnd(tid);
-}
-
-static uintptr_t WRAP_NAME(pthread_once)(WRAP_PARAM4) {
-  uintptr_t ret;
-  IgnoreMopsBegin(tid);
-  ret = CALL_ME_INSIDE_WRAPPER_4();
-  IgnoreMopsEnd(tid);
-  return ret;
-}
-
-void TmpCallback1(THREADID tid, ADDRINT pc) {
-  Printf("%s T%d %lx\n", __FUNCTION__, tid, pc);
-}
-void TmpCallback2(THREADID tid, ADDRINT pc) {
-  Printf("%s T%d %lx\n", __FUNCTION__, tid, pc);
-}
-
-//--------- Threads --------------------------------- {{{2
-static void HandleThreadCreateBefore(THREADID tid, ADDRINT pc) {
-  DumpEvent(0, THR_CREATE_BEFORE, tid, pc, 0, 0);
-  g_thread_create_lock.Lock();
-  IgnoreMopsBegin(tid);
-  CHECK(g_tid_of_thread_which_called_create_thread == (THREADID)-1);
-  g_tid_of_thread_which_called_create_thread = tid;
-  n_created_threads++;
-}
-
-static void HandleThreadCreateAbort(THREADID tid) {
-  CHECK(g_tid_of_thread_which_called_create_thread == tid);
-  g_tid_of_thread_which_called_create_thread = (THREADID)-1;
-  n_created_threads--;
-  IgnoreMopsEnd(tid);
-  g_thread_create_lock.Unlock();
-}
-
-static THREADID HandleThreadCreateAfter(THREADID tid, pthread_t child_ptid,
-                                        bool suspend_child) {
-  // Spin, waiting for last_child_tid to appear (i.e. wait for the thread to
-  // actually start) so that we know the child's tid. No locks.
-  while (!ATOMIC_READ(&g_pin_threads[tid].last_child_tid)) {
-    YIELD();
-  }
-
-  CHECK(g_tid_of_thread_which_called_create_thread == tid);
-  g_tid_of_thread_which_called_create_thread = -1;
-
-  THREADID last_child_tid = g_pin_threads[tid].last_child_tid;
-  CHECK(last_child_tid);
-
-  PinThread &child_t = g_pin_threads[last_child_tid];
-  child_t.my_ptid = child_ptid;
-
-#ifdef _MSC_VER
-  if (suspend_child) {
-    while (ATOMIC_READ(&child_t.startup_state) != PinThread::CHILD_READY) {
-      YIELD();
-    }
-    // Strictly speaking, PIN forbids calling system functions like this.
-    // This may violate application library isolation but
-    // a) YIELD == WINDOWS::Sleep, so we violate it anyways
-    // b) SuspendThread probably calls NtSuspendThread right away
-    WINDOWS::DWORD old_count = WINDOWS::SuspendThread((WINDOWS::HANDLE)child_ptid);  // TODO handle?
-    CHECK(old_count == 0);
-  }
-  child_t.startup_state = PinThread::MAY_CONTINUE;
-#else
-  CHECK(!suspend_child);  // Not implemented - do we need to?
-#endif
-
-  int uniq_tid_of_child = child_t.uniq_tid;
-  g_pin_threads[tid].last_child_tid = 0;
-
-  IgnoreMopsEnd(tid);
-  g_thread_create_lock.Unlock();
-
-  DumpEvent(0, THR_CREATE_AFTER, tid, 0, 0, uniq_tid_of_child);
-  return last_child_tid;
-}
-
-static uintptr_t WRAP_NAME(pthread_create)(WRAP_PARAM4) {
-  HandleThreadCreateBefore(tid, pc);
-
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  if (ret != 0) {
-    HandleThreadCreateAbort(tid);
-    return ret;
-  }
-
-  pthread_t child_ptid = *(pthread_t*)arg0;
-  HandleThreadCreateAfter(tid, child_ptid, false);
-
-  return ret;
-}
-
-void CallbackForThreadStart(THREADID tid, CONTEXT *ctxt,
-                            INT32 flags, void *v) {
-  // We can not rely on PIN_GetParentTid() since it is broken on Windows.
-
-  if (g_pin_threads == NULL) {
-    g_pin_threads = new PinThread[kMaxThreads];
-  }
-
-  bool has_parent = true;
-  if (tid == 0) {
-    // Main thread or we have attached to a running process.
-    has_parent = false;
-  } else {
-    CHECK(tid > 0);
-  }
-
-  CHECK(tid < kMaxThreads);
-  PinThread &t = g_pin_threads[tid];
-  memset(&t, 0, sizeof(PinThread));
-  t.uniq_tid = n_started_threads++;
-  t.literace_sampling = G_flags->literace_sampling;
-  t.tid = tid;
-  t.tleb.t = &t;
-#if defined(_MSC_VER)
-  t.startup_state = PinThread::STARTING;
-#endif
-  ComputeIgnoreAccesses(t);
-
-
-  PIN_SetContextReg(ctxt, tls_reg, (ADDRINT)&t.tleb.events[2]);
-
-  t.parent_tid = -1;
-  if (has_parent) {
-    t.parent_tid = g_tid_of_thread_which_called_create_thread;
-#if !defined(_MSC_VER)  // On Windows, threads may appear out of thin air.
-    CHECK(t.parent_tid != (THREADID)-1);
-#endif  // _MSC_VER
-  }
-
-  if (debug_thread) {
-    Printf("T%d ThreadStart parent=%d child=%d\n", tid, t.parent_tid, tid);
-  }
-
-  if (has_parent && t.parent_tid != (THREADID)-1) {
-    g_pin_threads[t.parent_tid].last_child_tid = tid;
-    t.thread_stack_size_if_known =
-        g_pin_threads[t.parent_tid].last_child_stack_size_if_known;
-  } else {
-#if defined(_MSC_VER)
-    t.startup_state = PinThread::MAY_CONTINUE;
-#endif
-  }
-
-  // This is a lock-free (thread local) operation.
-  TLEBStartThread(t);
-/* TODO(timurrrr): investigate and un-comment
-#ifdef _MSC_VER
-  // Ignore all mops & sync before the real thread code.
-  // See the corresponding IgnoreSyncAndMopsEnd in Before_BaseThreadInitThunk.
-  IgnoreSyncAndMopsBegin(tid);
-#endif
-*/
-}
-
-static void Before_start_thread(THREADID tid, ADDRINT pc, ADDRINT sp) {
-  PinThread &t = g_pin_threads[tid];
-  if (debug_thread) {
-    Printf("T%d Before_start_thread: sp=%p my_ptid=%p diff=%p\n",
-         tid, sp, t.my_ptid, t.my_ptid - sp);
-  }
-  // This is a rather scary hack, but I see no easy way to avoid it.
-  // On linux NPTL, the pthread_t structure is the same block of memory
-  // as the stack (and the tls?). Somewhere inside the pthread_t
-  // object lives the address of stackblock followed by its size
-  // (see nptl/descr.h).
-  // At the current point we may not know the value of pthread_t (my_ptid),
-  // but we do know the current sp, which is a bit less than my_ptid.
-  //
-  // address                        value
-  // ------------------------------------------------
-  // 0xffffffffffffffff:
-  //
-  // stackblock + stackblock_size:
-  // my_ptid:
-  //
-  //                                stackblock_size
-  //                                stackblock
-  //
-  // current_sp:
-  //
-  //
-  // stackblock:
-  //
-  // 0x0000000000000000:
-  // -------------------------------------------------
-  //
-  // So, we itrate from sp to the higher addresses (but just in case, not more
-  // than a few pages) trying to find a pair of values which looks like
-  // stackblock and stackblock_size. Oh well.
-  // Note that in valgrind we are able to get this info from
-  //  pthread_getattr_np (linux) or pthread_get_stackaddr_np (mac),
-  // but in PIN we can't call those (can we?).
-  uintptr_t prev = 0;
-  for (uintptr_t sp1 = sp; sp1 - sp < 0x2000;
-       sp1 += sizeof(uintptr_t)) {
-    uintptr_t val = *(uintptr_t*)sp1;
-    if (val == 0) continue;
-    if (prev &&
-        (prev & 0xfff) == 0 && // stack is page aligned
-        prev < sp &&           // min stack is < sp
-        prev + val > sp &&     // max stack is > sp
-        val >= (1 << 15) &&    // stack size is >= 32k
-        val <= 128 * (1 << 20) // stack size is hardly > 128M
-        ) {
-      if (debug_thread) {
-        Printf("T%d found stack: %p size=%p\n", tid, prev, val);
-      }
-      DumpEvent(0, THR_STACK_TOP, tid, pc, prev + val, val);
-      return;
-    }
-    prev = val;
-  }
-  // The hack above does not always works. (TODO(kcc)). Do something.
-  Printf("WARNING: ThreadSanitizerPin is guessing stack size for T%d\n", tid);
-  DumpEvent(0, THR_STACK_TOP, tid, pc, sp, t.thread_stack_size_if_known);
-}
-
-#ifdef _MSC_VER
-static uintptr_t WRAP_NAME(CreateThread)(WRAP_PARAM6) {
-  PinThread &t = g_pin_threads[tid];
-  t.last_child_stack_size_if_known = arg1 ? arg1 : 1024 * 1024;
-
-  HandleThreadCreateBefore(tid, pc);
-
-  // We can't start the thread suspended because we want to get its
-  // PIN thread ID before leaving CreateThread.
-  // So, we reset the CREATE_SUSPENDED flag and SuspendThread before any client
-  // code is executed in the HandleThreadCreateAfter if needed.
-  bool should_be_suspended = arg4 & CREATE_SUSPENDED;
-  arg4 &= ~CREATE_SUSPENDED;
-
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_6();
-  if (ret == NULL) {
-    HandleThreadCreateAbort(tid);
-    return ret;
-  }
-  pthread_t child_ptid = ret;
-  THREADID child_tid = HandleThreadCreateAfter(tid, child_ptid,
-                                               should_be_suspended);
-  {
-    ScopedReentrantClientLock lock(__LINE__);
-    if (g_win_handles_which_are_threads == NULL) {
-      g_win_handles_which_are_threads = new unordered_set<pthread_t>;
-    }
-    g_win_handles_which_are_threads->insert(child_ptid);
-  }
-  return ret;
-}
-
-static void Before_BaseThreadInitThunk(THREADID tid, ADDRINT pc, ADDRINT sp) {
-  PinThread &t = g_pin_threads[tid];
-  size_t stack_size = t.thread_stack_size_if_known;
-  // Printf("T%d %s %p %p\n", tid, __FUNCTION__, sp, stack_size);
-  /* TODO(timurrrr): investigate and uncomment
-  if (tid != 0) {
-    // Ignore all mops & sync before the real thread code.
-    // See the corresponding IgnoreSyncAndMopsBegin in CallbackForThreadStart.
-    IgnoreSyncAndMopsEnd(tid);
-    TLEBFlushLocked(t);
-    CHECK(t.ignore_sync == 0);
-    CHECK(t.ignore_accesses == 0);
-  }
-  */
-  DumpEvent(0, THR_STACK_TOP, tid, pc, sp, stack_size);
-
-#ifdef _MSC_VER
-  if (t.startup_state != PinThread::MAY_CONTINUE) {
-    CHECK(t.startup_state == PinThread::STARTING);
-    t.startup_state = PinThread::CHILD_READY;
-    while (ATOMIC_READ(&t.startup_state) != PinThread::MAY_CONTINUE) {
-      YIELD();
-    }
-    // Corresponds to SIGNAL from ResumeThread if the thread was suspended on
-    // start.
-    DumpEvent(0, WAIT, tid, pc, t.my_ptid, 0);
-  }
-#endif
-}
-
-static void Before_RtlExitUserThread(THREADID tid, ADDRINT pc) {
-  PinThread &t = g_pin_threads[tid];
-  if (t.tid != 0) {
-    // Once we started exiting the thread, ignore the locking events.
-    // This way we will avoid h-b arcs between unrelated threads.
-    // We also start ignoring all mops, otherwise we will get tons of race
-    // reports from the windows guts.
-    IgnoreSyncAndMopsBegin(tid);
-  }
-}
-#endif  // _MSC_VER
-
-void CallbackForThreadFini(THREADID tid, const CONTEXT *ctxt,
-                          INT32 code, void *v) {
-  PinThread &t = g_pin_threads[tid];
-  t.thread_finished = true;
-  // We can not DumpEvent here,
-  // due to possible deadlock with PIN's internal lock.
-  if (debug_thread) {
-    Printf("T%d Thread finished (ptid=%d)\n", tid, t.my_ptid);
-  }
-}
-
-static bool HandleThreadJoinAfter(THREADID tid, pthread_t joined_ptid) {
-  THREADID joined_tid = kMaxThreads;
-  int max_uniq_tid_found = -1;
-
-  // TODO(timurrrr): walking through g_pin_threads may be slow.
-  // Do we need to/Can we optimize it?
-  for (THREADID j = 1; j < kMaxThreads; j++) {
-    if (g_pin_threads[j].thread_finished == false)
-      continue;
-    if (g_pin_threads[j].my_ptid == joined_ptid) {
-      // We search for the thread with the maximum uniq_tid to work around
-      // thread HANDLE reuse issues.
-      if (max_uniq_tid_found < g_pin_threads[j].uniq_tid) {
-        max_uniq_tid_found = g_pin_threads[j].uniq_tid;
-        joined_tid = j;
-      }
-    }
-  }
-  if (joined_tid == kMaxThreads) {
-    // This may happen in the following case:
-    //  - A non-joinable thread is created and a handle is assigned to it.
-    //  - Since the thread is non-joinable, the handle is then reused
-    //  for some other purpose, e.g. for a WaitableEvent.
-    //  - We did not yet register the thread fini event.
-    //  - We observe WaitForSingleObjectEx(ptid) and think that this is thread
-    //  join event, while it is not.
-    if (debug_thread)
-      Printf("T%d JoinAfter returns false! ptid=%d\n", tid, joined_ptid);
-    return false;
-  }
-  CHECK(joined_tid < kMaxThreads);
-  CHECK(joined_tid > 0);
-  g_pin_threads[joined_tid].my_ptid = 0;
-  int joined_uniq_tid = g_pin_threads[joined_tid].uniq_tid;
-
-  if (debug_thread) {
-    Printf("T%d JoinAfter   parent=%d child=%d (uniq=%d)\n", tid, tid,
-           joined_tid, joined_uniq_tid);
-  }
-
-  // Here we send an event for a different thread (joined_tid), which is already
-  // dead.
-  DumpEvent(0, THR_END, joined_tid, 0, 0, 0);
-
-
-  DumpEvent(0, THR_JOIN_AFTER, tid, 0, joined_uniq_tid, 0);
-  return true;
-}
-
-static uintptr_t WRAP_NAME(pthread_join)(WRAP_PARAM4) {
-  if (G_flags->debug_level >= 2)
-    Printf("T%d in  pthread_join %p\n", tid, arg0);
-  pthread_t joined_ptid = (pthread_t)arg0;
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  HandleThreadJoinAfter(tid, joined_ptid);
-  if (G_flags->debug_level >= 2)
-    Printf("T%d out pthread_join %p\n", tid, arg0);
-  return ret;
-}
-
-static size_t WRAP_NAME(fwrite)(WRAP_PARAM4) {
-  void* p = (void*)arg0;
-  size_t size = (size_t)arg1 * (size_t)arg2;
-  REPORT_READ_RANGE(p, size);
-  IgnoreMopsBegin(tid);
-  size_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  IgnoreMopsEnd(tid);
-  return ret;
-}
-
-#ifdef _MSC_VER
-
-
-uintptr_t CallStdCallFun1(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0) {
-  uintptr_t ret = 0xdeadbee1;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG_END());
-  return ret;
-}
-
-uintptr_t CallStdCallFun2(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1) {
-  uintptr_t ret = 0xdeadbee2;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG(uintptr_t), arg1,
-                              PIN_PARG_END());
-  return ret;
-}
-
-uintptr_t CallStdCallFun3(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
-                         uintptr_t arg2) {
-  uintptr_t ret = 0xdeadbee3;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG(uintptr_t), arg1,
-                              PIN_PARG(uintptr_t), arg2,
-                              PIN_PARG_END());
-  return ret;
-}
-
-uintptr_t CallStdCallFun4(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
-                         uintptr_t arg2, uintptr_t arg3) {
-  uintptr_t ret = 0xdeadbee4;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG(uintptr_t), arg1,
-                              PIN_PARG(uintptr_t), arg2,
-                              PIN_PARG(uintptr_t), arg3,
-                              PIN_PARG_END());
-  return ret;
-}
-
-uintptr_t CallStdCallFun5(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
-                         uintptr_t arg2, uintptr_t arg3,
-                         uintptr_t arg4) {
-  uintptr_t ret = 0xdeadbee5;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG(uintptr_t), arg1,
-                              PIN_PARG(uintptr_t), arg2,
-                              PIN_PARG(uintptr_t), arg3,
-                              PIN_PARG(uintptr_t), arg4,
-                              PIN_PARG_END());
-  return ret;
-}
-
-uintptr_t CallStdCallFun6(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
-                         uintptr_t arg2, uintptr_t arg3,
-                         uintptr_t arg4, uintptr_t arg5) {
-  uintptr_t ret = 0xdeadbee6;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG(uintptr_t), arg1,
-                              PIN_PARG(uintptr_t), arg2,
-                              PIN_PARG(uintptr_t), arg3,
-                              PIN_PARG(uintptr_t), arg4,
-                              PIN_PARG(uintptr_t), arg5,
-                              PIN_PARG_END());
-  return ret;
-}
-
-uintptr_t CallStdCallFun7(CONTEXT *ctx, THREADID tid,
-                         AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
-                         uintptr_t arg2, uintptr_t arg3,
-                         uintptr_t arg4, uintptr_t arg5,
-                         uintptr_t arg6) {
-  uintptr_t ret = 0xdeadbee7;
-  PIN_CallApplicationFunction(ctx, tid,
-                              CALLINGSTD_STDCALL, (AFUNPTR)(f),
-                              PIN_PARG(uintptr_t), &ret,
-                              PIN_PARG(uintptr_t), arg0,
-                              PIN_PARG(uintptr_t), arg1,
-                              PIN_PARG(uintptr_t), arg2,
-                              PIN_PARG(uintptr_t), arg3,
-                              PIN_PARG(uintptr_t), arg4,
-                              PIN_PARG(uintptr_t), arg5,
-                              PIN_PARG(uintptr_t), arg6,
-                              PIN_PARG_END());
-  return ret;
-}
-
-uintptr_t WRAP_NAME(ResumeThread)(WRAP_PARAM4) {
-//  Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
-  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlInitializeCriticalSection)(WRAP_PARAM4) {
-//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
-  DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
-  IgnoreSyncAndMopsBegin(tid);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  IgnoreSyncAndMopsEnd(tid);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlInitializeCriticalSectionAndSpinCount)(WRAP_PARAM4) {
-//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
-  DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
-  IgnoreSyncAndMopsBegin(tid);
-  uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
-  IgnoreSyncAndMopsEnd(tid);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlInitializeCriticalSectionEx)(WRAP_PARAM4) {
-//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
-  DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
-  IgnoreSyncAndMopsBegin(tid);
-  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
-  IgnoreSyncAndMopsEnd(tid);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlDeleteCriticalSection)(WRAP_PARAM4) {
-//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
-  DumpEvent(ctx, LOCK_DESTROY, tid, pc, arg0, 0);
-  IgnoreSyncAndMopsBegin(tid);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  IgnoreSyncAndMopsEnd(tid);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlEnterCriticalSection)(WRAP_PARAM4) {
-//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlTryEnterCriticalSection)(WRAP_PARAM4) {
-  // Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+5, arg0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  if (ret) {
-    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  }
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlLeaveCriticalSection)(WRAP_PARAM4) {
-//  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
-  DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
-  return CallStdCallFun1(ctx, tid, f, arg0);
-}
-
-uintptr_t WRAP_NAME(DuplicateHandle)(WRAP_PARAM8) {
-  Printf("WARNING: DuplicateHandle called for handle 0x%X.\n", arg1);
-  Printf("Future events on this handle may be processed incorrectly.\n");
-  return CallStdCallFun7(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5, arg6);
-}
-
-uintptr_t WRAP_NAME(SetEvent)(WRAP_PARAM4) {
-  //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
-  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
-  return ret;
-}
-
-uintptr_t InternalWrapCreateSemaphore(WRAP_PARAM4) {
-  if (arg3 != NULL) {
-    Printf("WARNING: CreateSemaphore called with lpName='%s'.\n", arg3);
-    Printf("Future events on this semaphore may be processed incorrectly "
-           "if it is reused.\n");
-  }
-  return CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
-}
-
-uintptr_t WRAP_NAME(CreateSemaphoreA)(WRAP_PARAM4) {
-  return InternalWrapCreateSemaphore(tid, pc, ctx, f, arg0, arg1, arg2, arg3);
-}
-
-uintptr_t WRAP_NAME(CreateSemaphoreW)(WRAP_PARAM4) {
-  return InternalWrapCreateSemaphore(tid, pc, ctx, f, arg0, arg1, arg2, arg3);
-}
-
-uintptr_t WRAP_NAME(ReleaseSemaphore)(WRAP_PARAM4) {
-  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
-  return CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
-}
-
-uintptr_t WRAP_NAME(RtlInterlockedPushEntrySList)(WRAP_PARAM4) {
-  DumpEvent(ctx, SIGNAL, tid, pc, arg1, 0);
-  uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
-  // Printf("T%d %s list=%p item=%p\n", tid, __FUNCTION__, arg0, arg1);
-  return ret;
-}
-
-uintptr_t WRAP_NAME(RtlInterlockedPopEntrySList)(WRAP_PARAM4) {
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  // Printf("T%d %s list=%p item=%p\n", tid, __FUNCTION__, arg0, ret);
-  if (ret) {
-    DumpEvent(ctx, WAIT, tid, pc, ret, 0);
-  }
-  return ret;
-}
-
-uintptr_t WRAP_NAME(RtlAcquireSRWLockExclusive)(WRAP_PARAM4) {
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlAcquireSRWLockShared)(WRAP_PARAM4) {
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlTryAcquireSRWLockExclusive)(WRAP_PARAM4) {
-  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  if (ret & 0xFF) {  // Looks like this syscall return value is just 1 byte.
-    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  }
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlTryAcquireSRWLockShared)(WRAP_PARAM4) {
-  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  if (ret & 0xFF) {  // Looks like this syscall return value is just 1 byte.
-    DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
-  }
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlReleaseSRWLockExclusive)(WRAP_PARAM4) {
-  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
-  DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlReleaseSRWLockShared)(WRAP_PARAM4) {
-  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
-  DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlInitializeSRWLock)(WRAP_PARAM4) {
-  // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
-  DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  return ret;
-}
-
-uintptr_t WRAP_NAME(RtlWakeConditionVariable)(WRAP_PARAM4) {
-  // Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
-  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlWakeAllConditionVariable)(WRAP_PARAM4) {
-  // Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
-  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
-  uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlSleepConditionVariableSRW)(WRAP_PARAM4) {
-  // No need to unlock/lock - looks like RtlSleepConditionVariableSRW performs
-  // Rtl{Acquire,Release}SRW... calls itself!
-  uintptr_t ret = CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
-  if ((ret & 0xFF) == 0)
-    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
-  // Printf("T%d %s arg0=%p arg1=%p; ret=%d\n", tid, __FUNCTION__, arg0, arg1, ret);
-  return ret;
-}
-uintptr_t WRAP_NAME(RtlSleepConditionVariableCS)(WRAP_PARAM4) {
-  // TODO(timurrrr): do we need unlock/lock?
-  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
-  if ((ret & 0xFF) == 0)
-    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
-  // Printf("T%d %s arg0=%p arg1=%p; ret=%d\n", tid, __FUNCTION__, arg0, arg1, ret);
-  return ret;
-}
-
-uintptr_t WRAP_NAME(RtlQueueWorkItem)(WRAP_PARAM4) {
-  // Printf("T%d %s arg0=%p arg1=%p; arg2=%d\n", tid, __FUNCTION__, arg0, arg1, arg2);
-  g_windows_thread_pool_calback_set->insert(arg0);
-  DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
-  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
-  return ret;
-}
-
-uintptr_t WRAP_NAME(RegisterWaitForSingleObject)(WRAP_PARAM6) {
-  // Printf("T%d %s arg0=%p arg2=%p\n", tid, __FUNCTION__, arg0, arg2);
-  g_windows_thread_pool_calback_set->insert(arg2);
-  DumpEvent(ctx, SIGNAL, tid, pc, arg2, 0);
-  uintptr_t ret = CallStdCallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5);
-  if (ret) {
-    uintptr_t wait_object = *(uintptr_t*)arg0;
-    (*g_windows_thread_pool_wait_object_map)[wait_object] = arg2;
-    // Printf("T%d %s *arg0=%p\n", tid, __FUNCTION__, wait_object);
-  }
-  return ret;
-}
-
-uintptr_t WRAP_NAME(UnregisterWaitEx)(WRAP_PARAM4) {
-  CHECK(g_windows_thread_pool_wait_object_map);
-  uintptr_t obj = (*g_windows_thread_pool_wait_object_map)[arg0];
-  // Printf("T%d %s arg0=%p obj=%p\n", tid, __FUNCTION__, arg0, obj);
-  uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
-  if (ret) {
-    DumpEvent(ctx, WAIT, tid, pc, obj, 0);
-  }
-  return ret;
-}
-
-uintptr_t WRAP_NAME(VirtualAlloc)(WRAP_PARAM4) {
-  // Printf("T%d VirtualAlloc: %p %p %p %p\n", tid, arg0, arg1, arg2, arg3);
-  uintptr_t ret = CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
-  return ret;
-}
-
-uintptr_t WRAP_NAME(GlobalAlloc)(WRAP_PARAM4) {
-  uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
-  // Printf("T%d %s(%p %p)=%p\n", tid, __FUNCTION__, arg0, arg1, ret);
-  if (ret != 0) {
-    DumpEvent(ctx, MALLOC, tid, pc, ret, arg1);
-  }
-  return ret;
-}
-
-uintptr_t WRAP_NAME(ZwAllocateVirtualMemory)(WRAP_PARAM6) {
-  // Printf("T%d >>%s(%p %p %p %p %p %p)\n", tid, __FUNCTION__, arg0, arg1, arg2, arg3, arg4, arg5);
-  uintptr_t ret = CallStdCallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5);
-  // Printf("T%d <<%s(%p %p) = %p\n", tid, __FUNCTION__, *(void**)arg1, *(void**)arg3, ret);
-  if (ret == 0) {
-    DumpEvent(ctx, MALLOC, tid, pc, *(uintptr_t*)arg1, *(uintptr_t*)arg3);
-  }
-  return ret;
-}
-
-uintptr_t WRAP_NAME(AllocateHeap)(WRAP_PARAM4) {
-  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
-  // Printf("T%d RtlAllocateHeap(%p %p %p)=%p\n", tid, arg0, arg1, arg2, ret);
-  if (ret != 0) {
-    DumpEvent(ctx, MALLOC, tid, pc, ret, arg3);
-  }
-  return ret;
-}
-
-uintptr_t WRAP_NAME(HeapCreate)(WRAP_PARAM4) {
-  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
-  Printf("T%d %s(%p %p %p)=%p\n", tid, __FUNCTION__, arg0, arg1, arg2, ret);
-  return ret;
-}
-
-// We don't use the definition of WAIT_OBJECT_0 from winbase.h because
-// it can't be compiled here for some reason.
-#define WAIT_OBJECT_0_ 0
-
-uintptr_t WRAP_NAME(WaitForSingleObjectEx)(WRAP_PARAM4) {
-  if (G_flags->verbosity >= 1) {
-    ShowPcAndSp(__FUNCTION__, tid, pc, 0);
-    Printf("arg0=%lx arg1=%lx\n", arg0, arg1);
-  }
-
-  //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
-  uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
-  //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
-
-  if (ret == WAIT_OBJECT_0_) {
-    bool is_thread_handle = false;
-    {
-      ScopedReentrantClientLock lock(__LINE__);
-      if (g_win_handles_which_are_threads) {
-        is_thread_handle = g_win_handles_which_are_threads->count(arg0) > 0;
-        g_win_handles_which_are_threads->erase(arg0);
-      }
-    }
-    if (is_thread_handle)
-      HandleThreadJoinAfter(tid, arg0);
-    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
-  }
-
-  return ret;
-}
-
-uintptr_t WRAP_NAME(WaitForMultipleObjectsEx)(WRAP_PARAM6) {
-  if (G_flags->verbosity >= 1) {
-    ShowPcAndSp(__FUNCTION__, tid, pc, 0);
-    Printf("arg0=%lx arg1=%lx arg2=%lx arg3=%lx\n", arg0, arg1, arg2, arg3);
-  }
-
-  //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
-  uintptr_t ret = CallStdCallFun5(ctx, tid, f, arg0, arg1, arg2, arg3, arg4);
-  //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
-
-  if (ret >= WAIT_OBJECT_0_ && ret < WAIT_OBJECT_0_ + arg0) {
-    // TODO(timurrrr): add support for WAIT_ABANDONED_0
-
-    int start_id, count;
-    if (arg2 /* wait_for_all */ == 1) {
-      start_id = 0;
-      count = arg0;
-    } else {
-      start_id = ret - WAIT_OBJECT_0_;
-      count = 1;
-    }
-
-    for (int i = start_id; i < start_id + count; i++) {
-      uintptr_t handle = ((uintptr_t*)arg1)[i];
-      bool is_thread_handle = false;
-      {
-        ScopedReentrantClientLock lock(__LINE__);
-        if (g_win_handles_which_are_threads) {
-          is_thread_handle = g_win_handles_which_are_threads->count(handle) > 0;
-          g_win_handles_which_are_threads->erase(handle);
-        }
-      }
-      if (is_thread_handle)
-        HandleThreadJoinAfter(tid, handle);
-      DumpEvent(ctx, WAIT, tid, pc, handle, 0);
-    }
-  }
-
-  return ret;
-}
-
-#endif  // _MSC_VER
-
-//--------- memory allocation ---------------------- {{{2
-uintptr_t WRAP_NAME(mmap)(WRAP_PARAM6) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_6();
-
-  if (ret != (ADDRINT)-1L) {
-    DumpEvent(ctx, MMAP, tid, pc, ret, arg1);
-  }
-
-  return ret;
-}
-
-uintptr_t WRAP_NAME(munmap)(WRAP_PARAM4) {
-  PinThread &t = g_pin_threads[tid];
-  TLEBFlushLocked(t);
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  if (ret != (uintptr_t)-1L) {
-    DumpEvent(ctx, MUNMAP, tid, pc, arg0, arg1);
-  }
-  return ret;
-}
-
-
-void After_malloc(FAST_WRAP_PARAM_AFTER) {
-  size_t size = frame.arg[0];
-  if (DEBUG_FAST_INTERCEPTORS)
-    Printf("T%d %s %ld %p\n", tid, __FUNCTION__, size, ret);
-  IgnoreSyncAndMopsEnd(tid);
-  DumpEventWithSp(frame.sp, MALLOC, tid, frame.pc, ret, size);
-}
-
-void Before_malloc(FAST_WRAP_PARAM1) {
-  IgnoreSyncAndMopsBegin(tid);
-  PUSH_AFTER_CALLBACK1(After_malloc, arg0);
-}
-
-void After_free(FAST_WRAP_PARAM_AFTER) {
-  if (DEBUG_FAST_INTERCEPTORS)
-    Printf("T%d %s %p\n", tid, __FUNCTION__, frame.arg[0]);
-  IgnoreSyncAndMopsEnd(tid);
-}
-
-void Before_free(FAST_WRAP_PARAM1) {
-  PinThread &t = g_pin_threads[tid];
-  TLEBFlushLocked(t);
-  DumpEvent(0, FREE, tid, pc, arg0, 0);
-  IgnoreSyncAndMopsBegin(tid);
-  PUSH_AFTER_CALLBACK1(After_free, arg0);
-}
-
-void Before_calloc(FAST_WRAP_PARAM2) {
-  IgnoreSyncAndMopsBegin(tid);
-  PUSH_AFTER_CALLBACK1(After_malloc, arg0 * arg1);
-}
-
-void Before_realloc(FAST_WRAP_PARAM2) {
-  PinThread &t = g_pin_threads[tid];
-  TLEBFlushLocked(t);
-  IgnoreSyncAndMopsBegin(tid);
-  // TODO: handle FREE? We don't do it in Valgrind right now.
-  PUSH_AFTER_CALLBACK1(After_malloc, arg1);
-}
-
-// Fast path for INS_InsertIfCall.
-ADDRINT Before_RET_IF(THREADID tid, ADDRINT pc, ADDRINT sp, ADDRINT ret) {
-  PinThread &t = g_pin_threads[tid];
-  return t.ic_stack.size();
-}
-
-void Before_RET_THEN(THREADID tid, ADDRINT pc, ADDRINT sp, ADDRINT ret) {
-  PinThread &t = g_pin_threads[tid];
-  if (t.ic_stack.size() == 0) return;
-  DCHECK(t.ic_stack.size());
-  InstrumentedCallFrame *frame = t.ic_stack.Top();
-  if (DEBUG_FAST_INTERCEPTORS) {
-    Printf("T%d RET  pc=%p sp=%p *sp=%p frame.sp=%p stack_size %ld\n",
-           tid, pc, sp, *(uintptr_t*)sp, frame->sp, t.ic_stack.size());
-    t.ic_stack.Print();
-  }
-  while (frame->sp <= sp) {
-    if (DEBUG_FAST_INTERCEPTORS)
-      Printf("pop\n");
-    frame->callback(tid, *frame, ret);
-    t.ic_stack.Pop();
-    if (t.ic_stack.size()) {
-      frame = t.ic_stack.Top();
-    } else {
-      break;
-    }
-  }
-}
-
-// These are no longer used in favor of "fast" wrappers (e.g. Before_malloc)
-// TODO(timurrrr): Check on the buildbot and remove.
-uintptr_t WRAP_NAME(malloc)(WRAP_PARAM4) { CHECK(0); }
-uintptr_t WRAP_NAME(realloc)(WRAP_PARAM4) { CHECK(0); }
-uintptr_t WRAP_NAME(calloc)(WRAP_PARAM4) { CHECK(0); }
-uintptr_t WRAP_NAME(free)(WRAP_PARAM4) { CHECK(0); }
-
-
-//-------- Routines and stack ---------------------- {{{2
-static INLINE void UpdateCallStack(PinThread &t, ADDRINT sp) {
-  while (t.shadow_stack.size() > 0 && sp >= t.shadow_stack.back().sp) {
-    TLEBAddRtnExit(t);
-    size_t size = t.shadow_stack.size();
-    CHECK(size < 1000000);  // stay sane.
-    uintptr_t popped_pc = t.shadow_stack.back().pc;
-#ifdef _MSC_VER
-    // h-b edge from here to UnregisterWaitEx.
-    CHECK(g_windows_thread_pool_calback_set);
-    if (g_windows_thread_pool_calback_set->count(popped_pc)) {
-      DumpEvent(0, SIGNAL, t.tid, 0, popped_pc, 0);
-      // Printf("T%d ret %p\n", t.tid, popped_pc);
-    }
-#endif
-
-    if (debug_rtn) {
-      ShowPcAndSp("RET : ", t.tid, popped_pc, sp);
-    }
-    t.shadow_stack.pop_back();
-    CHECK(size - 1 == t.shadow_stack.size());
-    if (DEB_PR) {
-      Printf("POP SHADOW STACK\n");
-      PrintShadowStack(t);
-    }
-  }
-}
-
-void InsertBeforeEvent_SysCall(THREADID tid, ADDRINT sp) {
-  PinThread &t = g_pin_threads[tid];
-  UpdateCallStack(t, sp);
-  TLEBFlushLocked(t);
-}
-
-void InsertBeforeEvent_Call(THREADID tid, ADDRINT pc, ADDRINT target,
-                            ADDRINT sp, IGNORE_BELOW_RTN ignore_below) {
-  PinThread &t = g_pin_threads[tid];
-  DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, pc, sp);
-  UpdateCallStack(t, sp);
-  TLEBAddRtnCall(t, pc, target, ignore_below);
-  t.shadow_stack.push_back(StackFrame(target, sp));
-  if (DEB_PR) {
-    PrintShadowStack(t);
-  }
-  if (DEBUG_MODE && debug_rtn) {
-    ShowPcAndSp("CALL: ", t.tid, target, sp);
-  }
-
-#ifdef _MSC_VER
-  // h-b edge from RtlQueueWorkItem to here.
-  CHECK(g_windows_thread_pool_calback_set);
-  if (g_windows_thread_pool_calback_set->count(target)) {
-    DumpEvent(0, WAIT, tid, pc, target, 0);
-  }
-#endif
-}
-
-static void OnTraceSerial(THREADID tid, ADDRINT sp, TraceInfo *trace_info,
-    uintptr_t **tls_reg_p) {
-  PinThread &t = g_pin_threads[tid];
-
-  DCHECK(trace_info);
-  DCHECK(trace_info->n_mops() > 0);
-  DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, trace_info->pc(), sp);
-
-  UpdateCallStack(t, sp);
-
-  t.trace_info = trace_info;
-  trace_info->counter()++;
-  *tls_reg_p = TLEBAddTrace(t);
-}
-
-static void OnTraceParallel(uintptr_t *tls_reg, ADDRINT sp, TraceInfo *trace_info) {
-  // Get the thread handler directly from tls_reg.
-  PinThread &t = *(PinThread*)(tls_reg - 4);
-  t.trace_info = trace_info;
-  if (t.ignore_accesses) return;
-
-  DCHECK(trace_info);
-  DCHECK(trace_info->n_mops() > 0);
-  DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, trace_info->pc(), sp);
-
-  UpdateCallStack(t, sp);
-
-
-  if (DEBUG_MODE && G_flags->show_stats)  // this stat may be racey; avoid ping-pong.
-    trace_info->counter()++;
-  TLEBAddTrace(t);
-}
-
-/* Verify all mop accesses in the last trace of the given thread by registering
-   them with RaceVerifier and sleeping a bit. */
-static void OnTraceVerifyInternal(PinThread &t, uintptr_t **tls_reg_p) {
-  DCHECK(g_race_verifier_active);
-  if (t.trace_info) {
-    int need_sleep = 0;
-    for (unsigned i = 0; i < t.trace_info->n_mops(); ++i) {
-      uintptr_t addr = (*tls_reg_p)[i];
-      if (addr) {
-        MopInfo *mop = t.trace_info->GetMop(i);
-        need_sleep += RaceVerifierStartAccess(t.uniq_tid, addr, mop->pc(),
-            mop->is_write());
-      }
-    }
-
-    if (!need_sleep)
-      return;
-
-    usleep(G_flags->race_verifier_sleep_ms * 1000);
-
-    for (unsigned i = 0; i < t.trace_info->n_mops(); ++i) {
-      uintptr_t addr = (*tls_reg_p)[i];
-      if (addr) {
-        MopInfo *mop = t.trace_info->GetMop(i);
-        RaceVerifierEndAccess(t.uniq_tid, addr, mop->pc(), mop->is_write());
-      }
-    }
-  }
-}
-
-static void OnTraceNoMopsVerify(THREADID tid, ADDRINT sp,
-    uintptr_t **tls_reg_p) {
-  PinThread &t = g_pin_threads[tid];
-  DCHECK(g_race_verifier_active);
-  OnTraceVerifyInternal(t, tls_reg_p);
-  t.trace_info = NULL;
-}
-
-static void OnTraceVerify(THREADID tid, ADDRINT sp, TraceInfo *trace_info,
-    uintptr_t **tls_reg_p) {
-  DCHECK(g_race_verifier_active);
-  PinThread &t = g_pin_threads[tid];
-  OnTraceVerifyInternal(t, tls_reg_p);
-
-  DCHECK(trace_info->n_mops() > 0);
-
-  t.trace_info = trace_info;
-  trace_info->counter()++;
-  *tls_reg_p = TLEBAddTrace(t);
-}
-
-
-//---------- Memory accesses -------------------------- {{{2
-// 'addr' is the section of t.tleb.events which is set in OnTrace.
-// 'idx' is the number of this mop in its trace.
-// 'a' is the actuall address.
-// 'tid' is thread ID, used only in debug mode.
-//
-// In opt mode this is just one instruction! Something like this:
-// mov %rcx,(%rdi,%rdx,8)
-static void OnMop(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
-  if (DEBUG_MODE) {
-    PinThread &t= g_pin_threads[tid];
-    CHECK(idx < kMaxMopsPerTrace);
-    CHECK(idx < t.trace_info->n_mops());
-    uintptr_t *ptr = addr + idx;
-    CHECK(ptr >= t.tleb.events);
-    CHECK(ptr < t.tleb.events + kThreadLocalEventBufferSize);
-    if (a == G_flags->trace_addr) {
-      Printf("T%d %s %lx\n", t.tid, __FUNCTION__, a);
-    }
-  }
-  addr[idx] = a;
-}
-
-static void On_PredicatedMop(BOOL is_running, uintptr_t *addr,
-                             THREADID tid, ADDRINT idx, ADDRINT a) {
-  if (is_running) {
-    OnMop(addr, tid, idx, a);
-  }
-}
-
-static void OnMopCheckIdentStoreBefore(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
-  // Write the value of *a to tleb.
-  addr[idx] = *(uintptr_t*)a;
-}
-static void OnMopCheckIdentStoreAfter(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
-  // Check if the previous value of *a is equal to the new one.
-  // If not, we have a regular memory access. If yes, we have an ident operation,
-  // which we want to ignore.
-  uintptr_t previous_value_of_a = addr[idx];
-  uintptr_t new_value_of_a = *(uintptr_t*)a;
-  // 111...111 if the values are different, 0 otherwise.
-  uintptr_t ne_mask = -(uintptr_t)(new_value_of_a != previous_value_of_a);
-  addr[idx] = ne_mask & a;
-}
-
-//---------- I/O; exit------------------------------- {{{2
-static const uintptr_t kIOMagic = 0x1234c678;
-
-static void Before_SignallingIOCall(THREADID tid, ADDRINT pc) {
-  DumpEvent(0, SIGNAL, tid, pc, kIOMagic, 0);
-}
-
-static void After_WaitingIOCall(THREADID tid, ADDRINT pc) {
-  DumpEvent(0, WAIT, tid, pc, kIOMagic, 0);
-}
-
-static const uintptr_t kAtexitMagic = 0x9876f432;
-
-static void On_atexit(THREADID tid, ADDRINT pc) {
-  DumpEvent(0, SIGNAL, tid, pc, kAtexitMagic, 0);
-}
-
-static void On_exit(THREADID tid, ADDRINT pc) {
-  DumpEvent(0, WAIT, tid, pc, kAtexitMagic, 0);
-}
-
-//---------- Synchronization -------------------------- {{{2
-// locks
-static void Before_pthread_unlock(THREADID tid, ADDRINT pc, ADDRINT mu) {
-  DumpEvent(0, UNLOCK, tid, pc, mu, 0);
-}
-
-static void After_pthread_mutex_lock(FAST_WRAP_PARAM_AFTER) {
-  DumpEventWithSp(frame.sp, WRITER_LOCK, tid, frame.pc, frame.arg[0], 0);
-}
-
-static void Before_pthread_mutex_lock(FAST_WRAP_PARAM1) {
-  PUSH_AFTER_CALLBACK1(After_pthread_mutex_lock, arg0);
-}
-
-// In some versions of libpthread, pthread_spin_lock is effectively
-// a recursive function. It jumps to its first insn:
-//    beb0:       f0 ff 0f                lock decl (%rdi)
-//    beb3:       75 0b                   jne    bec0 <pthread_spin_lock+0x10>
-//    beb5:       31 c0                   xor    %eax,%eax
-//    beb7:       c3                      retq
-//    beb8:       0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
-//    bebf:       00
-//    bec0:       f3 90                   pause
-//    bec2:       83 3f 00                cmpl   $0x0,(%rdi)
-//    bec5:       7f e9  >>>>>>>>>>>>>    jg     beb0 <pthread_spin_lock> 
-//    bec7:       eb f7                   jmp    bec0 <pthread_spin_lock+0x10>
-//
-// So, we need to act only when we return from the last (depth=0) invocation.
-static uintptr_t WRAP_NAME(pthread_spin_lock)(WRAP_PARAM4) {
-  PinThread &t= g_pin_threads[tid];
-  t.spin_lock_recursion_depth++;
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  t.spin_lock_recursion_depth--;
-  if (t.spin_lock_recursion_depth == 0) {
-    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  }
-  return ret;
-}
-
-static uintptr_t WRAP_NAME(pthread_rwlock_wrlock)(WRAP_PARAM4) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-
-static uintptr_t WRAP_NAME(pthread_rwlock_rdlock)(WRAP_PARAM4) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-
-static uintptr_t WRAP_NAME(pthread_mutex_trylock)(WRAP_PARAM4) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  if (ret == 0)
-    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-
-static uintptr_t WRAP_NAME(pthread_spin_trylock)(WRAP_PARAM4) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  if (ret == 0)
-    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-
-static uintptr_t WRAP_NAME(pthread_spin_init)(WRAP_PARAM4) {
-  DumpEvent(ctx, UNLOCK_OR_INIT, tid, pc, arg0, 0);
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  return ret;
-}
-static uintptr_t WRAP_NAME(pthread_spin_destroy)(WRAP_PARAM4) {
-  DumpEvent(ctx, LOCK_DESTROY, tid, pc, arg0, 0);
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  return ret;
-}
-static uintptr_t WRAP_NAME(pthread_spin_unlock)(WRAP_PARAM4) {
-  DumpEvent(ctx, UNLOCK_OR_INIT, tid, pc, arg0, 0);
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  return ret;
-}
-
-static uintptr_t WRAP_NAME(pthread_rwlock_trywrlock)(WRAP_PARAM4) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  if (ret == 0)
-    DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-
-static uintptr_t WRAP_NAME(pthread_rwlock_tryrdlock)(WRAP_PARAM4) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  if (ret == 0)
-    DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
-  return ret;
-}
-
-
-static void Before_pthread_mutex_init(THREADID tid, ADDRINT pc, ADDRINT mu) {
-  DumpEvent(0, LOCK_CREATE, tid, pc, mu, 0);
-}
-static void Before_pthread_rwlock_init(THREADID tid, ADDRINT pc, ADDRINT mu) {
-  DumpEvent(0, LOCK_CREATE, tid, pc, mu, 0);
-}
-
-static void Before_pthread_mutex_destroy(THREADID tid, ADDRINT pc, ADDRINT mu) {
-  DumpEvent(0, LOCK_DESTROY, tid, pc, mu, 0);
-}
-static void Before_pthread_rwlock_destroy(THREADID tid, ADDRINT pc, ADDRINT mu) {
-  DumpEvent(0, LOCK_DESTROY, tid, pc, mu, 0);
-}
-
-// barrier
-static uintptr_t WRAP_NAME(pthread_barrier_init)(WRAP_PARAM4) {
-  DumpEvent(ctx, CYCLIC_BARRIER_INIT, tid, pc, arg0, arg2);
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  return ret;
-}
-static uintptr_t WRAP_NAME(pthread_barrier_wait)(WRAP_PARAM4) {
-  DumpEvent(ctx, CYCLIC_BARRIER_WAIT_BEFORE, tid, pc, arg0, 0);
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  DumpEvent(ctx, CYCLIC_BARRIER_WAIT_AFTER, tid, pc, arg0, 0);
-  return ret;
-}
-
-
-// condvar
-static void Before_pthread_cond_signal(THREADID tid, ADDRINT pc, ADDRINT cv) {
-  DumpEvent(0, SIGNAL, tid, pc, cv, 0);
-}
-
-static uintptr_t WRAP_NAME(pthread_cond_wait)(WRAP_PARAM4) {
-  DumpEvent(ctx, UNLOCK, tid, pc, arg1, 0);
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
-  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg1, 0);
-  return ret;
-}
-static uintptr_t WRAP_NAME(pthread_cond_timedwait)(WRAP_PARAM4) {
-  DumpEvent(ctx, UNLOCK, tid, pc, arg1, 0);
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  if (ret == 0) {
-    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
-  }
-  DumpEvent(ctx, WRITER_LOCK, tid, pc, arg1, 0);
-  return ret;
-}
-
-// epoll
-static const uintptr_t kSocketMagic = 0xDEADFBAD;
-
-static void Before_epoll_ctl(THREADID tid, ADDRINT pc) {
-  DumpEvent(0, SIGNAL, tid, pc, kSocketMagic, 0);
-}
-
-static void After_epoll_wait(THREADID tid, ADDRINT pc) {
-  DumpEvent(0, WAIT, tid, pc, kSocketMagic, 0);
-}
-
-// sem
-static void After_sem_open(THREADID tid, ADDRINT pc, ADDRINT ret) {
-  // TODO(kcc): need to handle it more precise?
-  DumpEvent(0, SIGNAL, tid, pc, ret, 0);
-}
-static void Before_sem_post(THREADID tid, ADDRINT pc, ADDRINT sem) {
-  DumpEvent(0, SIGNAL, tid, pc, sem, 0);
-}
-
-static uintptr_t WRAP_NAME(sem_wait)(WRAP_PARAM4) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
-  return ret;
-}
-static uintptr_t WRAP_NAME(sem_trywait)(WRAP_PARAM4) {
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-  if (ret == 0) {
-    DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
-  }
-  return ret;
-}
-
-// etc
-#if defined(__GNUC__)
-uintptr_t WRAP_NAME(lockf)(WRAP_PARAM4) {
-  if (arg1 == F_ULOCK) {
-    DumpEvent(0, SIGNAL, tid, pc, kSocketMagic, 0);
-  }
-
-  uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
-
-  if (arg1 == F_LOCK && ret == 0) {
-    DumpEvent(0, WAIT, tid, pc, kSocketMagic, 0);
-  }
-
-  return ret;
-}
-#endif
-
-//--------- Annotations -------------------------- {{{2
-static void On_AnnotateBenignRace(THREADID tid, ADDRINT pc,
-                                  ADDRINT file, ADDRINT line,
-                                  ADDRINT a, ADDRINT descr) {
-  DumpEvent(0, BENIGN_RACE, tid, descr, a, 1);
-}
-
-static void On_AnnotateBenignRaceSized(THREADID tid, ADDRINT pc,
-                                       ADDRINT file, ADDRINT line,
-                                       ADDRINT a, ADDRINT size, ADDRINT descr) {
-  DumpEvent(0, BENIGN_RACE, tid, descr, a, size);
-}
-
-static void On_AnnotateExpectRace(THREADID tid, ADDRINT pc,
-                                  ADDRINT file, ADDRINT line,
-                                  ADDRINT a, ADDRINT descr) {
-  DumpEvent(0, EXPECT_RACE, tid, descr, a, 0);
-}
-
-static void On_AnnotateFlushExpectedRaces(THREADID tid, ADDRINT pc,
-                                  ADDRINT file, ADDRINT line) {
-  DumpEvent(0, FLUSH_EXPECTED_RACES, 0, 0, 0, 0);
-}
-
-
-static void On_AnnotateTraceMemory(THREADID tid, ADDRINT pc,
-                                   ADDRINT file, ADDRINT line,
-                                   ADDRINT a) {
-  DumpEvent(0, TRACE_MEM, tid, pc, a, 0);
-}
-
-static void On_AnnotateNewMemory(THREADID tid, ADDRINT pc,
-                                   ADDRINT file, ADDRINT line,
-                                   ADDRINT a, ADDRINT size) {
-  DumpEvent(0, MALLOC, tid, pc, a, size);
-}
-
-static void On_AnnotateNoOp(THREADID tid, ADDRINT pc,
-                            ADDRINT file, ADDRINT line, ADDRINT a) {
-  Printf("%s T%d: %s:%d %p\n", __FUNCTION__, tid, (char*)file, (int)line, a);
-  //DumpEvent(0, STACK_TRACE, tid, pc, 0, 0);
-//  PrintShadowStack(tid);
-}
-
-static void On_AnnotateFlushState(THREADID tid, ADDRINT pc,
-                                  ADDRINT file, ADDRINT line) {
-  DumpEvent(0, FLUSH_STATE, tid, pc, 0, 0);
-}
-
-static void On_AnnotateCondVarSignal(THREADID tid, ADDRINT pc,
-                                     ADDRINT file, ADDRINT line, ADDRINT obj) {
-  DumpEvent(0, SIGNAL, tid, pc, obj, 0);
-}
-
-static void On_AnnotateCondVarWait(THREADID tid, ADDRINT pc,
-                                   ADDRINT file, ADDRINT line, ADDRINT obj) {
-  DumpEvent(0, WAIT, tid, pc, obj, 0);
-}
-
-static void On_AnnotateHappensBefore(THREADID tid, ADDRINT pc,
-                                     ADDRINT file, ADDRINT line, ADDRINT obj) {
-  DumpEvent(0, SIGNAL, tid, pc, obj, 0);
-}
-
-static void On_AnnotateHappensAfter(THREADID tid, ADDRINT pc,
-                                    ADDRINT file, ADDRINT line, ADDRINT obj) {
-  DumpEvent(0, WAIT, tid, pc, obj, 0);
-}
-
-static void On_AnnotateEnableRaceDetection(THREADID tid, ADDRINT pc,
-                                        ADDRINT file, ADDRINT line,
-                                        ADDRINT enable) {
-  if (!g_race_verifier_active)
-    TLEBSimpleEvent(g_pin_threads[tid],
-        enable ? TLEB_GLOBAL_IGNORE_OFF : TLEB_GLOBAL_IGNORE_ON);
-}
-
-static void On_AnnotateIgnoreReadsBegin(THREADID tid, ADDRINT pc,
-                                        ADDRINT file, ADDRINT line) {
-  DumpEvent(0, IGNORE_READS_BEG, tid, pc, 0, 0);
-}
-static void On_AnnotateIgnoreReadsEnd(THREADID tid, ADDRINT pc,
-                                      ADDRINT file, ADDRINT line) {
-  DumpEvent(0, IGNORE_READS_END, tid, pc, 0, 0);
-}
-static void On_AnnotateIgnoreWritesBegin(THREADID tid, ADDRINT pc,
-                                         ADDRINT file, ADDRINT line) {
-  DumpEvent(0, IGNORE_WRITES_BEG, tid, pc, 0, 0);
-}
-static void On_AnnotateIgnoreWritesEnd(THREADID tid, ADDRINT pc,
-                                       ADDRINT file, ADDRINT line) {
-  DumpEvent(0, IGNORE_WRITES_END, tid, pc, 0, 0);
-}
-static void On_AnnotateThreadName(THREADID tid, ADDRINT pc,
-                                  ADDRINT file, ADDRINT line,
-                                  ADDRINT name) {
-  DumpEvent(0, SET_THREAD_NAME, tid, pc, name, 0);
-}
-static void On_AnnotatePublishMemoryRange(THREADID tid, ADDRINT pc,
-                                          ADDRINT file, ADDRINT line,
-                                          ADDRINT a, ADDRINT size) {
-  DumpEvent(0, PUBLISH_RANGE, tid, pc, a, size);
-}
-
-static void On_AnnotateUnpublishMemoryRange(THREADID tid, ADDRINT pc,
-                                          ADDRINT file, ADDRINT line,
-                                          ADDRINT a, ADDRINT size) {
-//  Printf("T%d %s %lx %lx\n", tid, __FUNCTION__, a, size);
-  DumpEvent(0, UNPUBLISH_RANGE, tid, pc, a, size);
-}
-
-
-static void On_AnnotateMutexIsUsedAsCondVar(THREADID tid, ADDRINT pc,
-                                            ADDRINT file, ADDRINT line,
-                                            ADDRINT mu) {
-  DumpEvent(0, HB_LOCK, tid, pc, mu, 0);
-}
-
-static void On_AnnotateMutexIsNotPhb(THREADID tid, ADDRINT pc,
-                                     ADDRINT file, ADDRINT line,
-                                     ADDRINT mu) {
-  DumpEvent(0, NON_HB_LOCK, tid, pc, mu, 0);
-}
-
-static void On_AnnotatePCQCreate(THREADID tid, ADDRINT pc,
-                                 ADDRINT file, ADDRINT line,
-                                 ADDRINT pcq) {
-  DumpEvent(0, PCQ_CREATE, tid, pc, pcq, 0);
-}
-
-static void On_AnnotatePCQDestroy(THREADID tid, ADDRINT pc,
-                                  ADDRINT file, ADDRINT line,
-                                  ADDRINT pcq) {
-  DumpEvent(0, PCQ_DESTROY, tid, pc, pcq, 0);
-}
-
-static void On_AnnotatePCQPut(THREADID tid, ADDRINT pc,
-                              ADDRINT file, ADDRINT line,
-                              ADDRINT pcq) {
-  DumpEvent(0, PCQ_PUT, tid, pc, pcq, 0);
-}
-
-static void On_AnnotatePCQGet(THREADID tid, ADDRINT pc,
-                              ADDRINT file, ADDRINT line,
-                              ADDRINT pcq) {
-  DumpEvent(0, PCQ_GET, tid, pc, pcq, 0);
-}
-
-static void On_AnnotateRWLockCreate(THREADID tid, ADDRINT pc,
-                                    ADDRINT file, ADDRINT line,
-                                    ADDRINT lock) {
-  DumpEvent(0, LOCK_CREATE, tid, pc, lock, 0);
-}
-
-static void On_AnnotateRWLockDestroy(THREADID tid, ADDRINT pc,
-                                    ADDRINT file, ADDRINT line,
-                                    ADDRINT lock) {
-  DumpEvent(0, LOCK_DESTROY, tid, pc, lock, 0);
-}
-
-static void On_AnnotateRWLockAcquired(THREADID tid, ADDRINT pc,
-                                     ADDRINT file, ADDRINT line,
-                                     ADDRINT lock, ADDRINT is_w) {
-  DumpEvent(0, is_w ? WRITER_LOCK : READER_LOCK, tid, pc, lock, 0);
-}
-
-static void On_AnnotateRWLockReleased(THREADID tid, ADDRINT pc,
-                                     ADDRINT file, ADDRINT line,
-                                     ADDRINT lock, ADDRINT is_w) {
-  DumpEvent(0, UNLOCK, tid, pc, lock, 0);
-}
-
-
-int WRAP_NAME(RunningOnValgrind)(WRAP_PARAM4) {
-  return 1;
-}
-
-//--------- Instrumentation ----------------------- {{{1
-static bool IgnoreImage(IMG img) {
-  string name = IMG_Name(img);
-  if (name.find("/ld-") != string::npos)
-    return true;
-  return false;
-}
-
-static bool IgnoreRtn(RTN rtn) {
-  CHECK(rtn != RTN_Invalid());
-  ADDRINT rtn_address = RTN_Address(rtn);
-  if (ThreadSanitizerWantToInstrumentSblock(rtn_address) == false)
-    return true;
-  return false;
-}
-
-static bool InstrumentCall(INS ins) {
-  // Call.
-  if (INS_IsProcedureCall(ins) && !INS_IsSyscall(ins)) {
-    IGNORE_BELOW_RTN ignore_below = IGNORE_BELOW_RTN_UNKNOWN;
-    if (INS_IsDirectBranchOrCall(ins)) {
-      ADDRINT target = INS_DirectBranchOrCallTargetAddress(ins);
-      bool ignore = ThreadSanitizerIgnoreAccessesBelowFunction(target);
-      ignore_below = ignore ? IGNORE_BELOW_RTN_YES : IGNORE_BELOW_RTN_NO;
-    }
-    INS_InsertCall(ins, IPOINT_BEFORE,
-                   (AFUNPTR)InsertBeforeEvent_Call,
-                   IARG_THREAD_ID,
-                   IARG_INST_PTR,
-                   IARG_BRANCH_TARGET_ADDR,
-                   IARG_REG_VALUE, REG_STACK_PTR,
-                   IARG_ADDRINT, ignore_below,
-                   IARG_END);
-    return true;
-  }
-  if (INS_IsSyscall(ins)) {
-    INS_InsertCall(ins, IPOINT_BEFORE,
-                   (AFUNPTR)InsertBeforeEvent_SysCall,
-                   IARG_THREAD_ID,
-                   IARG_REG_VALUE, REG_STACK_PTR,
-                   IARG_END);
-  }
-  return false;
-}
-
-
-// return the number of inserted instrumentations.
-static void InstrumentMopsInBBl(BBL bbl, RTN rtn, TraceInfo *trace_info, uintptr_t instrument_pc, size_t *mop_idx) {
-  // compute 'dtor_head', see
-  // http://code.google.com/p/data-race-test/wiki/PopularDataRaces#Data_race_on_vptr
-  // On x86_64 only the first BB of DTOR is treated as dtor_head.
-  // On x86, we have to treat more BBs as dtor_head due to -fPIC.
-  // See http://code.google.com/p/chromium/issues/detail?id=61199
-  bool dtor_head = false;
-#ifdef TARGET_IA32
-  size_t max_offset_for_dtor_head = 32;
-#else
-  size_t max_offset_for_dtor_head = 0;
-#endif
-
-  if (BBL_Address(bbl) - RTN_Address(rtn) <= max_offset_for_dtor_head) {
-    string demangled_rtn_name = Demangle(RTN_Name(rtn).c_str());
-    if (demangled_rtn_name.find("::~") != string::npos)
-      dtor_head = true;
-  }
-
-  INS tail = BBL_InsTail(bbl);
-  // All memory reads/writes
-  for( INS ins = BBL_InsHead(bbl);
-       INS_Valid(ins);
-       ins = INS_Next(ins) ) {
-    if (ins != tail) {
-      CHECK(!INS_IsRet(ins));
-      CHECK(!INS_IsProcedureCall(ins));
-    }
-    // bool is_stack = INS_IsStackRead(ins) || INS_IsStackWrite(ins);
-    if (INS_IsAtomicUpdate(ins)) continue;
-
-    int n_mops = INS_MemoryOperandCount(ins);
-    if (n_mops == 0) continue;
-
-    string opcode_str = OPCODE_StringShort(INS_Opcode(ins));
-    if (trace_info && debug_ins) {
-      Printf("  INS: opcode=%s n_mops=%d dis=\"%s\"\n",
-             opcode_str.c_str(),  n_mops,
-             INS_Disassemble(ins).c_str());
-    }
-
-    bool ins_ignore_writes = false;
-    bool ins_ignore_reads = false;
-
-    // CALL writes to stack and (if the call is indirect) reads the target
-    // address. We don't want to handle the stack write.
-    if (INS_IsCall(ins)) {
-      CHECK(n_mops == 1 || n_mops == 2);
-      ins_ignore_writes = true;
-    }
-
-    // PUSH: we ignore the write to stack but we don't ignore the read (if any).
-    if (opcode_str == "PUSH") {
-      CHECK(n_mops == 1 || n_mops == 2);
-      ins_ignore_writes = true;
-    }
-
-    // POP: we are reading from stack, Ignore it.
-    if (opcode_str == "POP") {
-      CHECK(n_mops == 1 || n_mops == 2);
-      ins_ignore_reads = true;
-      continue;
-    }
-
-    // RET/LEAVE -- ignore it, it just reads the return address and stack.
-    if (INS_IsRet(ins) || opcode_str == "LEAVE") {
-      CHECK(n_mops == 1);
-      continue;
-    }
-
-    bool is_predicated = INS_IsPredicated(ins);
-    for (int i = 0; i < n_mops; i++) {
-      if (*mop_idx >= kMaxMopsPerTrace) {
-        Report("INFO: too many mops in trace: %d %s\n",
-            INS_Address(ins), PcToRtnName(INS_Address(ins), true).c_str());
-        return;
-      }
-      size_t size = INS_MemoryOperandSize(ins, i);
-      CHECK(size);
-      bool is_write = INS_MemoryOperandIsWritten(ins, i);
-
-      if (ins_ignore_writes && is_write) continue;
-      if (ins_ignore_reads && !is_write) continue;
-      if (instrument_pc && instrument_pc != INS_Address(ins)) continue;
-
-      bool check_ident_store = false;
-      if (dtor_head && is_write && INS_IsMov(ins) && size == sizeof(void*)) {
-        // This is a special case for '*addr = value', where we want to ignore the
-        // access if *addr == value before the store.
-        CHECK(!is_predicated);
-        check_ident_store = true;
-      }
-
-      if (trace_info) {
-        if (debug_ins) {
-          Printf("    size=%ld is_w=%d\n", size, (int)is_write);
-        }
-        IPOINT point = IPOINT_BEFORE;
-        AFUNPTR on_mop_callback = (AFUNPTR)OnMop;
-        if (check_ident_store) {
-          INS_InsertCall(ins, IPOINT_BEFORE,
-            (AFUNPTR)OnMopCheckIdentStoreBefore,
-            IARG_REG_VALUE, tls_reg,
-            IARG_THREAD_ID,
-            IARG_ADDRINT, *mop_idx,
-            IARG_MEMORYOP_EA, i,
-            IARG_END);
-          // This is just a MOV, so we can insert the instrumentation code
-          // after the insn.
-          point = IPOINT_AFTER;
-          on_mop_callback = (AFUNPTR)OnMopCheckIdentStoreAfter;
-        }
-
-        MopInfo *mop = trace_info->GetMop(*mop_idx);
-        new (mop) MopInfo(INS_Address(ins), size, is_write, false);
-        if (is_predicated) {
-          INS_InsertPredicatedCall(ins, point,
-              (AFUNPTR)On_PredicatedMop,
-              IARG_EXECUTING,
-              IARG_REG_VALUE, tls_reg,
-              IARG_THREAD_ID,
-              IARG_ADDRINT, *mop_idx,
-              IARG_MEMORYOP_EA, i,
-              IARG_END);
-        } else {
-          INS_InsertCall(ins, point,
-              on_mop_callback,
-              IARG_REG_VALUE, tls_reg,
-              IARG_THREAD_ID,
-              IARG_ADDRINT, *mop_idx,
-              IARG_MEMORYOP_EA, i,
-              IARG_END);
-        }
-      }
-      (*mop_idx)++;
-    }
-  }
-}
-
-void CallbackForTRACE(TRACE trace, void *v) {
-  CHECK(n_started_threads > 0);
-
-  RTN rtn = TRACE_Rtn(trace);
-  bool ignore_memory = false;
-  string img_name = "<>";
-  string rtn_name = "<>";
-  if (RTN_Valid(rtn)) {
-    SEC sec = RTN_Sec(rtn);
-    IMG img = SEC_Img(sec);
-    rtn_name = RTN_Name(rtn);
-    img_name = IMG_Name(img);
-
-    if (IgnoreImage(img)) {
-      // Printf("Ignoring memory accesses in %s\n", IMG_Name(img).c_str());
-      ignore_memory = true;
-    } else if (IgnoreRtn(rtn)) {
-      ignore_memory = true;
-    }
-  }
-
-  uintptr_t instrument_pc = 0;
-  if (g_race_verifier_active) {
-    // Check if this trace looks like part of a possible race report.
-    uintptr_t min_pc = UINTPTR_MAX;
-    uintptr_t max_pc = 0;
-    for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
-      min_pc = MIN(min_pc, INS_Address(BBL_InsHead(bbl)));
-      max_pc = MAX(max_pc, INS_Address(BBL_InsTail(bbl)));
-    }
-
-    bool verify_trace = RaceVerifierGetAddresses(min_pc, max_pc, &instrument_pc);
-    if (!verify_trace)
-      ignore_memory = true;
-  }
-
-  size_t n_mops = 0;
-  // count the mops.
-  for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
-    if (!ignore_memory) {
-      InstrumentMopsInBBl(bbl, rtn, NULL, instrument_pc, &n_mops);
-    }
-    INS tail = BBL_InsTail(bbl);
-    if (INS_IsRet(tail)) {
-#if 0
-      INS_InsertIfCall(tail, IPOINT_BEFORE,
-                       (AFUNPTR)Before_RET_IF,
-                       IARG_THREAD_ID,
-                       IARG_END);
-
-      INS_InsertThenCall(
-#else
-        INS_InsertCall(
-#endif
-          tail, IPOINT_BEFORE,
-          (AFUNPTR)Before_RET_THEN,
-          IARG_THREAD_ID,
-          IARG_INST_PTR,
-          IARG_REG_VALUE, REG_STACK_PTR,
-          IARG_FUNCRET_EXITPOINT_VALUE,
-          IARG_END);
-    }
-  }
-
-  // Handle the head of the trace
-  INS head = BBL_InsHead(TRACE_BblHead(trace));
-  CHECK(n_mops <= kMaxMopsPerTrace);
-
-  TraceInfo *trace_info = NULL;
-  if (n_mops) {
-    trace_info = TraceInfo::NewTraceInfo(n_mops, INS_Address(head));
-    if (TS_SERIALIZED == 0) {
-      // TODO(kcc): implement race verifier here.
-      INS_InsertCall(head, IPOINT_BEFORE,
-                     (AFUNPTR)OnTraceParallel,
-                     IARG_REG_VALUE, tls_reg,
-                     IARG_REG_VALUE, REG_STACK_PTR,
-                     IARG_PTR, trace_info,
-                     IARG_END);
-    } else {
-      AFUNPTR handler = (AFUNPTR)(g_race_verifier_active ?
-                                  OnTraceVerify : OnTraceSerial);
-      INS_InsertCall(head, IPOINT_BEFORE,
-                     handler,
-                     IARG_THREAD_ID,
-                     IARG_REG_VALUE, REG_STACK_PTR,
-                     IARG_PTR, trace_info,
-                     IARG_REG_REFERENCE, tls_reg,
-                     IARG_END);
-    }
-  } else {
-    if (g_race_verifier_active) {
-      INS_InsertCall(head, IPOINT_BEFORE,
-                     (AFUNPTR)OnTraceNoMopsVerify,
-                     IARG_THREAD_ID,
-                     IARG_REG_VALUE, REG_STACK_PTR,
-                     IARG_REG_REFERENCE, tls_reg,
-                     IARG_END);
-    }
-  }
-
-  // instrument the mops. We want to do it after we instrumented the head
-  // to maintain the right order of instrumentation callbacks (head first, then
-  // mops).
-  size_t i = 0;
-  if (n_mops) {
-    if (debug_ins) {
-      Printf("TRACE %p (%p); n_mops=%ld %s\n", trace_info,
-             TRACE_Address(trace),
-             trace_info->n_mops(),
-             PcToRtnName(trace_info->pc(), false).c_str());
-    }
-    for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
-      InstrumentMopsInBBl(bbl, rtn, trace_info, instrument_pc, &i);
-    }
-  }
-
-  // instrument the calls, do it after all other instrumentation.
-  if (!g_race_verifier_active) {
-    for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
-      InstrumentCall(BBL_InsTail(bbl));
-    }
-  }
-
-  CHECK(n_mops == i);
-}
-
-
-#define INSERT_FN_HELPER(point, name, rtn, to_insert, ...) \
-    RTN_Open(rtn); \
-    if (G_flags->verbosity >= 2) Printf("RTN: Inserting %-50s (%s) %s (%s) img: %s\n", \
-    #to_insert, #point, RTN_Name(rtn).c_str(), name, IMG_Name(img).c_str());\
-    RTN_InsertCall(rtn, point, (AFUNPTR)to_insert, IARG_THREAD_ID, \
-                   IARG_INST_PTR, __VA_ARGS__, IARG_END);\
-    RTN_Close(rtn); \
-
-#define INSERT_FN(point, name, to_insert, ...) \
-  while (RtnMatchesName(rtn_name, name)) {\
-    INSERT_FN_HELPER(point, name, rtn, to_insert, __VA_ARGS__); \
-    break;\
-  }\
-
-
-#define INSERT_BEFORE_FN(name, to_insert, ...) \
-    INSERT_FN(IPOINT_BEFORE, name, to_insert, __VA_ARGS__)
-
-#define INSERT_BEFORE_1_SP(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, \
-                     IARG_REG_VALUE, REG_STACK_PTR, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0)
-
-#define INSERT_BEFORE_2_SP(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, \
-                     IARG_REG_VALUE, REG_STACK_PTR, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1)
-
-#define INSERT_BEFORE_0(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, IARG_END);
-
-#define INSERT_BEFORE_1(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0)
-
-#define INSERT_BEFORE_2(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1)
-
-#define INSERT_BEFORE_3(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 2)
-
-#define INSERT_BEFORE_4(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 3)
-
-#define INSERT_BEFORE_5(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 3, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 4)
-
-#define INSERT_BEFORE_6(name, to_insert) \
-    INSERT_BEFORE_FN(name, to_insert, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 3, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 4, \
-                     IARG_FUNCARG_ENTRYPOINT_VALUE, 5)
-
-#define INSERT_AFTER_FN(name, to_insert, ...) \
-    INSERT_FN(IPOINT_AFTER, name, to_insert, __VA_ARGS__)
-
-#define INSERT_AFTER_0(name, to_insert) \
-    INSERT_AFTER_FN(name, to_insert, IARG_END)
-
-#define INSERT_AFTER_1(name, to_insert) \
-    INSERT_AFTER_FN(name, to_insert, IARG_FUNCRET_EXITPOINT_VALUE)
-
-
-#ifdef _MSC_VER
-void WrapStdCallFunc1(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc2(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc3(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc4(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc5(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc6(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc7(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc8(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc10(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 8,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 9,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-void WrapStdCallFunc11(RTN rtn, char *name, AFUNPTR replacement_func) {
-  if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
-    InformAboutFunctionWrap(rtn, name);
-    PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
-                                 CALLINGSTD_STDCALL,
-                                 "proto",
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG(uintptr_t),
-                                 PIN_PARG_END());
-    RTN_ReplaceSignature(rtn,
-                         AFUNPTR(replacement_func),
-                         IARG_PROTOTYPE, proto,
-                         IARG_THREAD_ID,
-                         IARG_INST_PTR,
-                         IARG_CONTEXT,
-                         IARG_ORIG_FUNCPTR,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 8,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 9,
-                         IARG_FUNCARG_ENTRYPOINT_VALUE, 10,
-                         IARG_END);
-    PROTO_Free(proto);
-  }
-}
-
-#endif
-
-static void MaybeInstrumentOneRoutine(IMG img, RTN rtn) {
-  if (IgnoreImage(img)) {
-    return;
-  }
-  string rtn_name = RTN_Name(rtn);
-  string img_name = IMG_Name(img);
-  if (debug_wrap) {
-    Printf("%s: %s %s pc=%p\n", __FUNCTION__, rtn_name.c_str(),
-           img_name.c_str(), RTN_Address(rtn));
-  }
-
-  // malloc/free/etc
-  const char *malloc_names[] = {
-    "malloc",
-#if defined(__GNUC__)
-    "_Znwm",
-    "_Znam",
-    "_Znwj",
-    "_Znaj",
-    "_ZnwmRKSt9nothrow_t",
-    "_ZnamRKSt9nothrow_t",
-    "_ZnwjRKSt9nothrow_t",
-    "_ZnajRKSt9nothrow_t",
-#endif
-#if defined(_MSC_VER)
-    "operator new",
-    "operator new[]",
-#endif  // _MSC_VER
-  };
-
-  const char *free_names[] = {
-    "free",
-#if defined(__GNUC__)
-    "_ZdaPv",
-    "_ZdlPv",
-    "_ZdlPvRKSt9nothrow_t",
-    "_ZdaPvRKSt9nothrow_t",
-#endif  // __GNUC__
-#if defined(_MSC_VER)
-    "operator delete",
-    "operator delete[]",
-#endif  // _MSC_VER
-  };
-
-  for (size_t i = 0; i < TS_ARRAY_SIZE(malloc_names); i++) {
-    const char *name = malloc_names[i];
-    INSERT_BEFORE_1_SP(name, Before_malloc);
-  }
-
-  for (size_t i = 0; i < TS_ARRAY_SIZE(free_names); i++) {
-    const char *name = free_names[i];
-    INSERT_BEFORE_1_SP(name, Before_free);
-  }
-
-  INSERT_BEFORE_2_SP("calloc", Before_calloc);
-  INSERT_BEFORE_2_SP("realloc", Before_realloc);
-
-#if defined(__GNUC__)
-  WrapFunc6(img, rtn, "mmap", (AFUNPTR)WRAP_NAME(mmap));
-  WrapFunc4(img, rtn, "munmap", (AFUNPTR)WRAP_NAME(munmap));
-
-  WrapFunc4(img, rtn, "lockf", (AFUNPTR)WRAP_NAME(lockf));
-  // pthread create/join
-  WrapFunc4(img, rtn, "pthread_create", (AFUNPTR)WRAP_NAME(pthread_create));
-  WrapFunc4(img, rtn, "pthread_join", (AFUNPTR)WRAP_NAME(pthread_join));
-  WrapFunc4(img, rtn, "fwrite", (AFUNPTR)WRAP_NAME(fwrite));
-
-  INSERT_FN(IPOINT_BEFORE, "start_thread",
-            Before_start_thread,
-            IARG_REG_VALUE, REG_STACK_PTR, IARG_END);
-
-   // pthread_cond_*
-  INSERT_BEFORE_1("pthread_cond_signal", Before_pthread_cond_signal);
-  WRAP4(pthread_cond_wait);
-  WRAP4(pthread_cond_timedwait);
-
-  // pthread_mutex_*
-  INSERT_BEFORE_1("pthread_mutex_init", Before_pthread_mutex_init);
-  INSERT_BEFORE_1("pthread_mutex_destroy", Before_pthread_mutex_destroy);
-  INSERT_BEFORE_1("pthread_mutex_unlock", Before_pthread_unlock);
-
-
-  INSERT_BEFORE_1_SP("pthread_mutex_lock", Before_pthread_mutex_lock);
-  WRAP4(pthread_mutex_trylock);
-  WRAP4(pthread_spin_lock);
-  WRAP4(pthread_spin_trylock);
-  WRAP4(pthread_spin_init);
-  WRAP4(pthread_spin_destroy);
-  WRAP4(pthread_spin_unlock);
-  WRAP4(pthread_rwlock_wrlock);
-  WRAP4(pthread_rwlock_rdlock);
-  WRAP4(pthread_rwlock_trywrlock);
-  WRAP4(pthread_rwlock_tryrdlock);
-
-  // pthread_rwlock_*
-  INSERT_BEFORE_1("pthread_rwlock_init", Before_pthread_rwlock_init);
-  INSERT_BEFORE_1("pthread_rwlock_destroy", Before_pthread_rwlock_destroy);
-  INSERT_BEFORE_1("pthread_rwlock_unlock", Before_pthread_unlock);
-
-  // pthread_barrier_*
-  WrapFunc4(img, rtn, "pthread_barrier_init",
-            (AFUNPTR)WRAP_NAME(pthread_barrier_init));
-  WrapFunc4(img, rtn, "pthread_barrier_wait",
-            (AFUNPTR)WRAP_NAME(pthread_barrier_wait));
-
-  // pthread_once
-  WrapFunc4(img, rtn, "pthread_once", (AFUNPTR)WRAP_NAME(pthread_once));
-
-  // sem_*
-  INSERT_AFTER_1("sem_open", After_sem_open);
-  INSERT_BEFORE_1("sem_post", Before_sem_post);
-  WRAP4(sem_wait);
-  WRAP4(sem_trywait);
-
-  INSERT_BEFORE_0("epoll_ctl", Before_epoll_ctl);
-  INSERT_AFTER_0("epoll_wait", After_epoll_wait);
-#endif  // __GNUC__
-
-#ifdef _MSC_VER
-  WrapStdCallFunc6(rtn, "CreateThread", (AFUNPTR)WRAP_NAME(CreateThread));
-  WRAPSTD1(ResumeThread);
-
-  INSERT_FN(IPOINT_BEFORE, "BaseThreadInitThunk",
-            Before_BaseThreadInitThunk,
-            IARG_REG_VALUE, REG_STACK_PTR, IARG_END);
-
-  INSERT_BEFORE_0("RtlExitUserThread", Before_RtlExitUserThread);
-  INSERT_BEFORE_0("ExitThread", Before_RtlExitUserThread);
-
-  WRAPSTD1(RtlInitializeCriticalSection);
-  WRAPSTD2(RtlInitializeCriticalSectionAndSpinCount);
-  WRAPSTD3(RtlInitializeCriticalSectionEx);
-  WRAPSTD1(RtlDeleteCriticalSection);
-  WRAPSTD1(RtlEnterCriticalSection);
-  WRAPSTD1(RtlTryEnterCriticalSection);
-  WRAPSTD1(RtlLeaveCriticalSection);
-  WRAPSTD7(DuplicateHandle);
-  WRAPSTD1(SetEvent);
-  WRAPSTD4(CreateSemaphoreA);
-  WRAPSTD4(CreateSemaphoreW);
-  WRAPSTD3(ReleaseSemaphore);
-
-  WRAPSTD1(RtlInterlockedPopEntrySList);
-  WRAPSTD2(RtlInterlockedPushEntrySList);
-
-#if 1
-  WRAPSTD1(RtlAcquireSRWLockExclusive);
-  WRAPSTD1(RtlAcquireSRWLockShared);
-  WRAPSTD1(RtlTryAcquireSRWLockExclusive);
-  WRAPSTD1(RtlTryAcquireSRWLockShared);
-  WRAPSTD1(RtlReleaseSRWLockExclusive);
-  WRAPSTD1(RtlReleaseSRWLockShared);
-  WRAPSTD1(RtlInitializeSRWLock);
-  // For some reason, RtlInitializeSRWLock is aliased to RtlInitializeSRWLock..
-  WrapStdCallFunc1(rtn, "RtlRunOnceInitialize",
-                   (AFUNPTR)Wrap_RtlInitializeSRWLock);
-
-  /* We haven't seen these syscalls used in the wild yet.
-  WRAPSTD2(RtlUpdateClonedSRWLock);
-  WRAPSTD1(RtlAcquireReleaseSRWLockExclusive);
-  WRAPSTD1(RtlUpdateClonedCriticalSection);
-  */
-
-  WRAPSTD1(RtlWakeConditionVariable);
-  WRAPSTD1(RtlWakeAllConditionVariable);
-  WRAPSTD4(RtlSleepConditionVariableSRW);
-  WRAPSTD3(RtlSleepConditionVariableCS);
-#endif  // if 1
-
-  WRAPSTD3(RtlQueueWorkItem);
-  WRAPSTD6(RegisterWaitForSingleObject);
-  WRAPSTD2(UnregisterWaitEx);
-
-  WRAPSTD3(WaitForSingleObjectEx);
-  WRAPSTD5(WaitForMultipleObjectsEx);
-
-  WrapStdCallFunc4(rtn, "VirtualAlloc", (AFUNPTR)(WRAP_NAME(VirtualAlloc)));
-  WrapStdCallFunc6(rtn, "ZwAllocateVirtualMemory", (AFUNPTR)(WRAP_NAME(ZwAllocateVirtualMemory)));
-  WrapStdCallFunc2(rtn, "GlobalAlloc", (AFUNPTR)WRAP_NAME(GlobalAlloc));
-//  WrapStdCallFunc3(rtn, "RtlAllocateHeap", (AFUNPTR) WRAP_NAME(AllocateHeap));
-//  WrapStdCallFunc3(rtn, "HeapCreate", (AFUNPTR) WRAP_NAME(HeapCreate));
-#endif  // _MSC_VER
-
-  // Annotations.
-  INSERT_BEFORE_4("AnnotateBenignRace", On_AnnotateBenignRace);
-  INSERT_BEFORE_5("AnnotateBenignRaceSized", On_AnnotateBenignRaceSized);
-  INSERT_BEFORE_5("WTFAnnotateBenignRaceSized", On_AnnotateBenignRaceSized);
-  INSERT_BEFORE_4("AnnotateExpectRace", On_AnnotateExpectRace);
-  INSERT_BEFORE_2("AnnotateFlushExpectedRaces", On_AnnotateFlushExpectedRaces);
-  INSERT_BEFORE_3("AnnotateTraceMemory", On_AnnotateTraceMemory);
-  INSERT_BEFORE_4("AnnotateNewMemory", On_AnnotateNewMemory);
-  INSERT_BEFORE_3("AnnotateNoOp", On_AnnotateNoOp);
-  INSERT_BEFORE_2("AnnotateFlushState", On_AnnotateFlushState);
-
-  INSERT_BEFORE_3("AnnotateCondVarWait", On_AnnotateCondVarWait);
-  INSERT_BEFORE_3("AnnotateCondVarSignal", On_AnnotateCondVarSignal);
-  INSERT_BEFORE_3("AnnotateCondVarSignalAll", On_AnnotateCondVarSignal);
-  INSERT_BEFORE_3("AnnotateHappensBefore", On_AnnotateHappensBefore);
-  INSERT_BEFORE_3("WTFAnnotateHappensBefore", On_AnnotateHappensBefore);
-  INSERT_BEFORE_3("AnnotateHappensAfter", On_AnnotateHappensAfter);
-  INSERT_BEFORE_3("WTFAnnotateHappensAfter", On_AnnotateHappensAfter);
-
-  INSERT_BEFORE_3("AnnotateEnableRaceDetection", On_AnnotateEnableRaceDetection);
-  INSERT_BEFORE_0("AnnotateIgnoreReadsBegin", On_AnnotateIgnoreReadsBegin);
-  INSERT_BEFORE_0("AnnotateIgnoreReadsEnd", On_AnnotateIgnoreReadsEnd);
-  INSERT_BEFORE_0("AnnotateIgnoreWritesBegin", On_AnnotateIgnoreWritesBegin);
-  INSERT_BEFORE_0("AnnotateIgnoreWritesEnd", On_AnnotateIgnoreWritesEnd);
-  INSERT_BEFORE_3("AnnotateThreadName", On_AnnotateThreadName);
-  INSERT_BEFORE_4("AnnotatePublishMemoryRange", On_AnnotatePublishMemoryRange);
-  INSERT_BEFORE_4("AnnotateUnpublishMemoryRange", On_AnnotateUnpublishMemoryRange);
-  INSERT_BEFORE_3("AnnotateMutexIsUsedAsCondVar", On_AnnotateMutexIsUsedAsCondVar);
-  INSERT_BEFORE_3("AnnotateMutexIsNotPHB", On_AnnotateMutexIsNotPhb);
-
-  INSERT_BEFORE_3("AnnotatePCQCreate", On_AnnotatePCQCreate);
-  INSERT_BEFORE_3("AnnotatePCQDestroy", On_AnnotatePCQDestroy);
-  INSERT_BEFORE_3("AnnotatePCQPut", On_AnnotatePCQPut);
-  INSERT_BEFORE_3("AnnotatePCQGet", On_AnnotatePCQGet);
-
-  INSERT_BEFORE_3("AnnotateRWLockCreate", On_AnnotateRWLockCreate);
-  INSERT_BEFORE_3("AnnotateRWLockDestroy", On_AnnotateRWLockDestroy);
-  INSERT_BEFORE_4("AnnotateRWLockAcquired", On_AnnotateRWLockAcquired);
-  INSERT_BEFORE_4("AnnotateRWLockReleased", On_AnnotateRWLockReleased);
-
-  // ThreadSanitizerQuery
-  WrapFunc4(img, rtn, "ThreadSanitizerQuery",
-            (AFUNPTR)WRAP_NAME(ThreadSanitizerQuery));
-  WrapFunc4(img, rtn, "RunningOnValgrind",
-            (AFUNPTR)WRAP_NAME(RunningOnValgrind));
-
-  // I/O
-  INSERT_BEFORE_0("write", Before_SignallingIOCall);
-  INSERT_BEFORE_0("unlink", Before_SignallingIOCall);
-  INSERT_BEFORE_0("rmdir", Before_SignallingIOCall);
-//  INSERT_BEFORE_0("send", Before_SignallingIOCall);
-  INSERT_AFTER_0("__read_nocancel", After_WaitingIOCall);
-  INSERT_AFTER_0("fopen", After_WaitingIOCall);
-  INSERT_AFTER_0("__fopen_internal", After_WaitingIOCall);
-  INSERT_AFTER_0("open", After_WaitingIOCall);
-  INSERT_AFTER_0("opendir", After_WaitingIOCall);
-//  INSERT_AFTER_0("recv", After_WaitingIOCall);
-
-  // strlen and friends.
-  // These wrappers will generate memory access events.
-  // So, if we don't want to get those events (e.g. memcpy inside
-  // ld.so or ntdll.dll) we don't wrap them and the regular
-  // ignore machinery will make sure we don't get the events.
-  if (ThreadSanitizerWantToInstrumentSblock(RTN_Address(rtn))) {
-    ReplaceFunc3(img, rtn, "memchr", (AFUNPTR)Replace_memchr);
-    ReplaceFunc3(img, rtn, "strchr", (AFUNPTR)Replace_strchr);
-    ReplaceFunc3(img, rtn, "index", (AFUNPTR)Replace_strchr);
-    ReplaceFunc3(img, rtn, "strchrnul", (AFUNPTR)Replace_strchrnul);
-    ReplaceFunc3(img, rtn, "strrchr", (AFUNPTR)Replace_strrchr);
-    ReplaceFunc3(img, rtn, "rindex", (AFUNPTR)Replace_strrchr);
-    ReplaceFunc3(img, rtn, "strlen", (AFUNPTR)Replace_strlen);
-    ReplaceFunc3(img, rtn, "strcmp", (AFUNPTR)Replace_strcmp);
-    ReplaceFunc3(img, rtn, "strncmp", (AFUNPTR)Replace_strncmp);
-    ReplaceFunc3(img, rtn, "memcpy", (AFUNPTR)Replace_memcpy);
-    ReplaceFunc3(img, rtn, "memcmp", (AFUNPTR)Replace_memcmp);
-    ReplaceFunc3(img, rtn, "memmove", (AFUNPTR)Replace_memmove);
-    ReplaceFunc3(img, rtn, "strcpy", (AFUNPTR)Replace_strcpy);
-    ReplaceFunc3(img, rtn, "strncpy", (AFUNPTR)Replace_strncpy);
-    ReplaceFunc3(img, rtn, "strcat", (AFUNPTR)Replace_strcat);
-    ReplaceFunc3(img, rtn, "stpcpy", (AFUNPTR)Replace_stpcpy);
-  }
-
-  // __cxa_guard_acquire / __cxa_guard_release
-  INSERT_BEFORE_1("__cxa_guard_acquire", Before_cxa_guard_acquire);
-  INSERT_AFTER_1("__cxa_guard_acquire", After_cxa_guard_acquire);
-  INSERT_AFTER_0("__cxa_guard_release", After_cxa_guard_release);
-
-  INSERT_BEFORE_0("atexit", On_atexit);
-  INSERT_BEFORE_0("exit", On_exit);
-}
-
-// Pin calls this function every time a new img is loaded.
-static void CallbackForIMG(IMG img, void *v) {
-  if (debug_wrap) {
-    Printf("Started CallbackForIMG %s\n", IMG_Name(img).c_str());
-  }
-
-  string img_name = IMG_Name(img);
-  for (SEC sec = IMG_SecHead(img); SEC_Valid(sec); sec = SEC_Next(sec)) {
-    for (RTN rtn = SEC_RtnHead(sec); RTN_Valid(rtn); rtn = RTN_Next(rtn)) {
-      MaybeInstrumentOneRoutine(img, rtn);
-    }
-  }
-  // In DEBUG_MODE check that we have the debug symbols in the Windows guts.
-  // We should work w/o them too.
-  // TODO(timurrrr): I doubt the problem is the missing symbols.
-  // I have a strong gut feeling that this syscall was added
-  // in Vista but only used since Windows 7. We had its wrapper wrong
-  // (found on W7) but the Vista build was fine for months.
-  // Also, we wrap RtlReleaseSRWLock*, so our TSan assertions would have been
-  // broken if RtlTryAcquireSRWLock* wasn't wrapped - and we haven't see this.
-  if (DEBUG_MODE && img_name.find("ntdll.dll") != string::npos) {
-    if (g_wrapped_functions.count("RtlTryAcquireSRWLockExclusive") == 0) {
-      Printf("WARNING: Debug symbols for ntdll.dll not found.\n");
-    }
-  }
-}
-
-// Returns:
-// TRUE
-// If user is interested to inject Pin (and tool) into child/exec-ed process
-// FALSE
-// If user is not interested to inject Pin (and tool) into child/exec-ed process
-static BOOL CallbackForExec(CHILD_PROCESS childProcess, VOID *val) {
-  int argc = 0;
-  const CHAR *const * argv = NULL;
-  CHILD_PROCESS_GetCommandLine(childProcess, &argc, &argv);
-  CHECK(argc > 0);
-  CHECK(argv);
-  bool follow = G_flags->trace_children;
-  if (DEBUG_MODE) {
-    Printf("CallbackForExec: follow=%d: ", follow);
-    for (int i = 0; i < argc; i++) {
-      Printf("%s ", argv[i]);
-    }
-  }
-  Printf("\n");
-  return follow;
-}
-
-//--------- Fini ---------- {{{1
-static void CallbackForFini(INT32 code, void *v) {
-  DumpEvent(0, THR_END, 0, 0, 0, 0);
-  ThreadSanitizerFini();
-  if (g_race_verifier_active) {
-    RaceVerifierFini();
-  }
-  if (G_flags->show_stats) {
-    TraceInfo::PrintTraceProfile();
-  }
-  if (G_flags->error_exitcode && GetNumberOfFoundErrors() > 0) {
-    exit(G_flags->error_exitcode);
-  }
-}
-
-//--------- Call Coverage ----------------- {{{1
-// A simplistic call coverage tool.
-// Outputs all pairs <call_site,call_target>.
-
-typedef set<pair<uintptr_t, uintptr_t> > CallCoverageSet;
-static CallCoverageSet *call_coverage_set;
-
-static map<uintptr_t, string> *function_names_map;
-static uintptr_t symbolized_functions_cache[1023];
-static pair<uintptr_t, uintptr_t> registered_pairs_cache[1023];
-
-static void symbolize_pc(uintptr_t pc) {
-  // Check a simple cache if we already symbolized this pc (racey).
-  size_t idx = pc % TS_ARRAY_SIZE(symbolized_functions_cache);
-  if (symbolized_functions_cache[idx] == pc) return;
-
-  ScopedReentrantClientLock lock(__LINE__);
-  CHECK(function_names_map);
-  if (function_names_map->count(pc) == 0) {
-    (*function_names_map)[pc] = PcToRtnName(pc, false);
-  }
-  symbolized_functions_cache[idx] = pc;
-}
-
-static void CallCoverageRegisterCall(uintptr_t from, uintptr_t to) {
-  symbolize_pc(from);
-  symbolize_pc(to);
-
-  // Check if we already registered this pair (racey).
-  size_t idx = (from ^ to) % TS_ARRAY_SIZE(registered_pairs_cache);
-  if (registered_pairs_cache[idx] == make_pair(from,to)) return;
-
-  ScopedReentrantClientLock lock(__LINE__);
-  call_coverage_set->insert(make_pair(from, to));
-  registered_pairs_cache[idx] = make_pair(from,to);
-}
-
-static void CallCoverageCallbackForTRACE(TRACE trace, void *v) {
-  RTN rtn = TRACE_Rtn(trace);
-  if (RTN_Valid(rtn)) {
-    SEC sec = RTN_Sec(rtn);
-    IMG img = SEC_Img(sec);
-    string img_name = IMG_Name(img);
-    // Don't instrument system libraries.
-    if (img_name.find("/usr/") == 0) return;
-  }
-
-  if (call_coverage_set == NULL) {
-    call_coverage_set = new CallCoverageSet;
-    function_names_map = new map<uintptr_t, string>;
-  }
-  for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
-    INS ins = BBL_InsTail(bbl);
-    if (!INS_IsProcedureCall(ins) || INS_IsSyscall(ins)) continue;
-    if (INS_IsDirectBranchOrCall(ins)) {
-      // If <from, to> is know at instrumentation time, don't instrument.
-      ADDRINT to = INS_DirectBranchOrCallTargetAddress(ins);
-      ADDRINT from = INS_Address(ins);
-      CallCoverageRegisterCall(from, to);
-    } else {
-      // target is dynamic. Need to instrument.
-      INS_InsertCall(ins, IPOINT_BEFORE,
-                     (AFUNPTR)CallCoverageRegisterCall,
-                     IARG_INST_PTR,
-                     IARG_BRANCH_TARGET_ADDR,
-                     IARG_END);
-    }
-  }
-}
-
-// Output all <from,to> pairs.
-static void CallCoverageCallbackForFini(INT32 code, void *v) {
-  CHECK(call_coverage_set);
-  CHECK(function_names_map);
-  for (CallCoverageSet::iterator it = call_coverage_set->begin();
-       it != call_coverage_set->end(); ++it) {
-    string from_name = (*function_names_map)[it->first];
-    string to_name   = (*function_names_map)[it->second];
-    if (to_name == ".plt" || to_name == "") continue;
-    Printf("CallCoverage: %s => %s\n", from_name.c_str(), to_name.c_str());
-  }
-}
-
-//--------- Main -------------------------- {{{1
-int main(INT32 argc, CHAR **argv) {
-  PIN_Init(argc, argv);
-  PIN_InitSymbols();
-  G_out = stderr;
-
-  // Init ThreadSanitizer.
-  int first_param = 1;
-  // skip until '-t something.so'.
-  for (; first_param < argc && argv[first_param] != string("-t");
-       first_param++) {
-  }
-  first_param += 2;
-  vector<string> args;
-  for (; first_param < argc; first_param++) {
-    string param = argv[first_param];
-    if (param == "--") break;
-    if (param == "-short_name") continue;
-    if (param == "-slow_asserts") continue;
-    if (param == "1") continue;
-    args.push_back(param);
-  }
-
-  G_flags = new FLAGS;
-  ThreadSanitizerParseFlags(&args);
-
-  if (G_flags->dry_run >= 2) {
-    PIN_StartProgram();
-    return 0;
-  }
-
-  FILE *socket_output = OpenSocketForWriting(G_flags->log_file);
-  if (socket_output) {
-    G_out = socket_output;
-  } else if (!G_flags->log_file.empty()) {
-    // Replace %p with tool PID
-    string fname = G_flags->log_file;
-    char pid_str[100] = "";
-    sprintf(pid_str, "%u", getpid());
-    while (fname.find("%p") != fname.npos)
-      fname.replace(fname.find("%p"), 2, pid_str);
-
-    G_out = fopen(fname.c_str(), "w");
-    CHECK(G_out);
-  }
-
-  ThreadSanitizerInit();
-
-  if (G_flags->call_coverage) {
-    PIN_AddFiniFunction(CallCoverageCallbackForFini, 0);
-    TRACE_AddInstrumentFunction(CallCoverageCallbackForTRACE, 0);
-    PIN_StartProgram();
-    return 0;
-  }
-
-  tls_reg = PIN_ClaimToolRegister();
-  CHECK(REG_valid(tls_reg));
-#if _MSC_VER
-  g_windows_thread_pool_calback_set = new unordered_set<uintptr_t>;
-  g_windows_thread_pool_wait_object_map = new unordered_map<uintptr_t, uintptr_t>;
-#endif
-
-  // Set up PIN callbacks.
-  PIN_AddThreadStartFunction(CallbackForThreadStart, 0);
-  PIN_AddThreadFiniFunction(CallbackForThreadFini, 0);
-  PIN_AddFiniFunction(CallbackForFini, 0);
-  IMG_AddInstrumentFunction(CallbackForIMG, 0);
-  TRACE_AddInstrumentFunction(CallbackForTRACE, 0);
-  PIN_AddFollowChildProcessFunction(CallbackForExec, NULL);
-
-  Report("ThreadSanitizerPin r%s pin %d: %s\n",
-         TS_VERSION, PIN_BUILD_NUMBER,
-         G_flags->pure_happens_before ? "hybrid=no" : "hybrid=yes");
-  if (DEBUG_MODE) {
-    Report("INFO: Debug build\n");
-  }
-
-  if (g_race_verifier_active) {
-    RaceVerifierInit(G_flags->race_verifier, G_flags->race_verifier_extra);
-    global_ignore = true;
-  }
-
-  // Fire!
-  PIN_StartProgram();
-  return 0;
-}
-
-//--------- Questions about PIN -------------------------- {{{1
-/* Questions about PIN:
-
-  - Names (e.g. pthread_create@... __pthread_mutex_unlock)
-  - How to get name of a global var by it's address?
-  - How to get stack pointer at thread creation?
-  - How to get a stack trace (other than intercepting calls, entries, exits)
-  - assert with full stack trace?
-  */
-// end. {{{1
-// vim:shiftwidth=2:softtabstop=2:expandtab
diff --git a/tsan/ts_race_verifier.cc b/tsan/ts_race_verifier.cc
deleted file mode 100644
index a2e9fd6..0000000
--- a/tsan/ts_race_verifier.cc
+++ /dev/null
@@ -1,445 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Evgeniy Stepanov.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <vector>
-#include <set>
-#include <iterator>
-#include <algorithm>
-
-#include "ts_lock.h"
-#include "ts_util.h"
-#include "ts_race_verifier.h"
-#include "thread_sanitizer.h"
-
-struct PossibleRace {
-  PossibleRace() : pc(0), reported(false) {}
-  // racy instruction
-  uintptr_t pc;
-  // concurrent traces
-  vector<uintptr_t> traces;
-  // report text
-  string report;
-  // whether this race has already been reported
-  bool reported;
-};
-
-// pc -> race
-static map<uintptr_t, PossibleRace*>* races_map;
-
-// Data about a call site.
-struct CallSite {
-  int thread_id;
-  uintptr_t pc;
-};
-
-struct TypedCallSites {
-  vector<CallSite> reads;
-  vector<CallSite> writes;
-};
-
-// data address -> ([write callsites], [read callsites])
-typedef map<uintptr_t, TypedCallSites> AddressMap;
-
-static TSLock racecheck_lock;
-static AddressMap* racecheck_map;
-// data addresses that are ignored (they have already been reported)
-static set<uintptr_t>* ignore_addresses;
-
-// starting pc of the trace -> visit count
-// used to reduce the sleep time for hot traces
-typedef map<uintptr_t, int> VisitCountMap;
-static VisitCountMap* visit_count_map;
-
-static int n_reports;
-
-/**
- * Given max and min pc of a trace (both inclusive!), returns whether this trace
- * is interesting to us at all (via the return value), and whether it should be
- * instrumented fully (*instrument_pc=0), or 1 instruction only. In the latter
- * case, *instrument_pc contains the address of the said instruction.
- */
-bool RaceVerifierGetAddresses(uintptr_t min_pc, uintptr_t max_pc,
-    uintptr_t* instrument_pc) {
-  uintptr_t pc = 0;
-  for (map<uintptr_t, PossibleRace*>::iterator it = races_map->begin();
-       it != races_map->end(); ++it) {
-    PossibleRace* race = it->second;
-    if (race->reported)
-      continue;
-    if (race->pc >= min_pc && race->pc <= max_pc) {
-      if (pc) {
-        // Two race candidates in one trace. Just instrument it fully.
-        *instrument_pc = 0;
-        return true;
-      }
-      pc = race->pc;
-    }
-    for (vector<uintptr_t>::iterator it2 = race->traces.begin();
-         it2 != race->traces.end(); ++it2) {
-      if (*it2 >= min_pc && *it2 <= max_pc) {
-        *instrument_pc = 0;
-        return true;
-      }
-    }
-  }
-  *instrument_pc = pc;
-  return !!pc;
-}
-
-static void UpdateSummary() {
-  if (!G_flags->summary_file.empty()) {
-    char buff[100];
-    snprintf(buff, sizeof(buff),
-	     "RaceVerifier: %d report(s) verified\n", n_reports);
-    // We overwrite the contents of this file with the new summary.
-    // We don't do that at the end because even if we crash later
-    // we will already have the summary.
-    OpenFileWriteStringAndClose(G_flags->summary_file, buff);
-  }
-}
-
-/* Build and print a race report for a data address. Does not print stack traces
-   and symbols and all the fancy stuff - we don't have that info. Used when we
-   don't have a ready report - for unexpected races and for
-   --race-verifier-extra races.
-
-   racecheck_lock must be held by the current thread.
-*/
-static void PrintRaceReportEmpty(uintptr_t addr) {
-  TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
-  vector<CallSite>& writes = typedCallSites->writes;
-  vector<CallSite>& reads = typedCallSites->reads;
-  for (vector<CallSite>::const_iterator it = writes.begin();
-       it != writes.end(); ++ it) {
-    Printf("  write at %p\n", it->pc);
-  }
-  for (vector<CallSite>::const_iterator it = reads.begin();
-       it != reads.end(); ++ it) {
-    Printf("  read at %p\n", it->pc);
-  }
-}
-
-/* Find a PossibleRace that matches current accesses (racecheck_map) to the
-   given data address.
-
-   racecheck_lock must be held by the current thread.
- */
-static PossibleRace* FindRaceForAddr(uintptr_t addr) {
-  TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
-  vector<CallSite>& writes = typedCallSites->writes;
-  vector<CallSite>& reads = typedCallSites->reads;
-  for (vector<CallSite>::const_iterator it = writes.begin();
-       it != writes.end(); ++ it) {
-    map<uintptr_t, PossibleRace*>::iterator it2 = races_map->find(it->pc);
-    if (it2 != races_map->end())
-      return it2->second;
-  }
-  for (vector<CallSite>::const_iterator it = reads.begin();
-       it != reads.end(); ++ it) {
-    map<uintptr_t, PossibleRace*>::iterator it2 = races_map->find(it->pc);
-    if (it2 != races_map->end())
-      return it2->second;
-  }
-  return NULL;
-}
-
-/* Prints a race report for the given data address, either finding one in a
-   matching PossibleRace, or just printing pc's of the mops.
-
-   racecheck_lock must be held by the current thread.
-*/
-static void PrintRaceReport(uintptr_t addr) {
-  PossibleRace* race = FindRaceForAddr(addr);
-  if (race) {
-    ExpectedRace* expected_race = ThreadSanitizerFindExpectedRace(addr);
-    if (expected_race)
-      expected_race->count++;
-    bool is_expected = !!expected_race;
-    bool is_unverifiable = is_expected && !expected_race->is_verifiable;
-
-    if (is_expected && !is_unverifiable && !G_flags->show_expected_races)
-      return;
-
-    if (is_unverifiable)
-      Printf("WARNING: Confirmed a race that was marked as UNVERIFIABLE:\n");
-    else
-      Printf("WARNING: Confirmed a race:\n");
-    const string& report = race->report;
-    if (report.empty()) {
-      PrintRaceReportEmpty(addr);
-    } else {
-      Printf("%s", report.c_str());
-    }
-    // Suppress future reports for this race.
-    race->reported = true;
-    ignore_addresses->insert(addr);
-
-    n_reports++;
-  } else {
-    Printf("Warning: unexpected race found!\n");
-    PrintRaceReportEmpty(addr);
-
-    n_reports ++;
-  }
-  UpdateSummary();
-}
-
-/**
- * This function is called before the mop delay.
- * @param thread_id Thread id.
- * @param addr Data address.
- * @param pc Instruction pc.
- * @param is_w Whether this is a write (true) or a read (false).
- * @return True if this access is interesting to us at all. If true, the caller
- *     should delay and then call RaceVerifierEndAccess. If false, it should do
- *     nothing more for this mop.
- */
-bool RaceVerifierStartAccess(int thread_id, uintptr_t addr, uintptr_t pc,
-    bool is_w) {
-  CallSite callSite;
-  callSite.thread_id = thread_id;
-  callSite.pc = pc;
-  racecheck_lock.Lock();
-
-  if (debug_race_verifier)
-    Printf("[%d] pc %p %s addr %p start\n", thread_id, pc,
-        is_w ? "write" : "read", addr);
-
-  if (ignore_addresses->count(addr)) {
-    racecheck_lock.Unlock();
-    return false;
-  }
-
-  TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
-  vector<CallSite>& writes = typedCallSites->writes;
-  vector<CallSite>& reads = typedCallSites->reads;
-  (is_w ? writes : reads).push_back(callSite);
-  if (writes.size() > 0 && writes.size() + reads.size() > 1) {
-    bool is_race = false;
-    for (size_t i = 0; !is_race && i < writes.size(); ++i) {
-      for (size_t j = 0; !is_race && j < writes.size(); ++j)
-        if (writes[i].thread_id != writes[j].thread_id)
-          is_race = true;
-      for (size_t j = 0; !is_race && j < reads.size(); ++j)
-        if (writes[i].thread_id != reads[j].thread_id)
-          is_race = true;
-    }
-    if (is_race)
-      PrintRaceReport(addr);
-  }
-  racecheck_lock.Unlock();
-  return true;
-}
-
-/* This function is called after the mop delay, only if RaceVerifierStartAccess
-   returned true. The arguments are exactly the same. */
-void RaceVerifierEndAccess(int thread_id, uintptr_t addr, uintptr_t pc,
-    bool is_w) {
-  racecheck_lock.Lock();
-
-  if (debug_race_verifier)
-    Printf("[%d] pc %p %s addr %p end\n", thread_id, pc,
-        is_w ? "write" : "read", addr);
-  if (ignore_addresses->count(addr)) {
-    racecheck_lock.Unlock();
-    return;
-  }
-
-  TypedCallSites* typedCallSites = &(*racecheck_map)[addr];
-  vector<CallSite>& vec =
-      is_w ? typedCallSites->writes : typedCallSites->reads;
-  for (int i = vec.size() - 1; i >= 0; --i) {
-    if (vec[i].thread_id == thread_id) {
-      vec.erase(vec.begin() + i);
-      break;
-    }
-  }
-  racecheck_lock.Unlock();
-}
-
-/* Parse a race description that appears in TSan logs after the words
-   "Race verifier data: ", not including the said words. It looks like
-   "pc,trace[,trace]...", without spaces. */
-static PossibleRace* ParseRaceInfo(const string& raceInfo) {
-  PossibleRace* race = new PossibleRace();
-  const char* p = raceInfo.c_str();
-  while (true) {
-    char* end;
-    uintptr_t addr = my_strtol(p, &end, 16);
-    if (p == end) {
-      Printf("Parse error: %s\n", p);
-      exit(1);
-    }
-    if (!race->pc)
-      race->pc = addr;
-    else
-      race->traces.push_back(addr);
-    while (*end == '\n' || *end == '\r')
-      ++end;
-    if (*end == '\0') {
-      // raceInfo already ends with \n
-      Printf("Possible race: %s", raceInfo.c_str());
-      return race;
-    }
-    if (*end != ',') {
-      Printf("Parse error: comma expected: %s\n", end);
-      delete race;
-      return NULL;
-    }
-    p = end + 1;
-  }
-}
-
-/* Parse a race description and add it to races_map. */
-static void RaceVerifierParseRaceInfo(const string& raceInfo) {
-  PossibleRace* race = ParseRaceInfo(raceInfo);
-  if (race)
-    (*races_map)[race->pc] = race;
-  else
-    Printf("Bad raceInfo: %s\n", raceInfo.c_str());
-}
-
-
-class StringStream {
- public:
-  StringStream(const string &s) : s_(s), data_(s.c_str()), p_(data_) {}
-
-  bool Eof() {
-    return !*p_;
-  }
-
-  string NextLine() {
-    const char* first = p_;
-    while (*p_ && *p_ != '\n') {
-      ++p_;
-    }
-    if (*p_)
-      ++p_;
-    return string(first, p_ - first);
-  }
-
- private:
-  const string& s_;
-  const char* data_;
-  const char* p_;
-};
-
-/* Parse a TSan log and add all race verifier info's from it to our storage of
-   possible races. */
-static void RaceVerifierParseFile(const string& fileName) {
-  Printf("Reading race data from %s\n", fileName.c_str());
-  const string RACEINFO_MARKER = "Race verifier data: ";
-  string log = ReadFileToString(fileName, true /* die_if_failed */);
-  StringStream ss(log);
-  string* desc = NULL;
-  int count = 0;
-  while (!ss.Eof()) {
-    string line = ss.NextLine();
-    size_t pos;
-    if ((line.find("WARNING: Possible data race during") !=
-            string::npos) ||
-        (line.find("WARNING: Expected data race during") !=
-            string::npos)) {
-      desc = new string();
-      (*desc) += line;
-    } else if ((pos = line.find(RACEINFO_MARKER)) != string::npos) {
-      pos += RACEINFO_MARKER.size();
-      string raceInfo = line.substr(pos);
-      PossibleRace* race = ParseRaceInfo(raceInfo);
-      (*desc) += "}}}\n";
-      race->report = *desc;
-      (*races_map)[race->pc] = race;
-      count ++;
-      delete desc;
-      desc = NULL;
-    } else if (desc) {
-      (*desc) += line;
-    }
-  }
-  Printf("Got %d possible races\n", count);
-}
-
-/**
- * Return the time to sleep for the given trace.
- * @param trace_pc The starting pc of the trace.
- * @return Time to sleep in ms, or 0 if this trace should be ignored.
- */
-int RaceVerifierGetSleepTime(uintptr_t trace_pc) {
-  racecheck_lock.Lock();
-  int visit_count = ++(*visit_count_map)[trace_pc];
-  int tm;
-  if (visit_count < 20) {
-    tm = G_flags->race_verifier_sleep_ms;
-  } else if (visit_count < 200) {
-    tm = G_flags->race_verifier_sleep_ms / 10;
-  } else {
-    tm = 0;
-  }
-  if (debug_race_verifier) {
-    if (visit_count == 20) {
-      Printf("RaceVerifier: Trace %x: sleep time reduced.\n", trace_pc);
-    } else if (visit_count == 200) {
-      Printf("RaceVerifier: Trace %x: ignored.\n", trace_pc);
-    }
-  }
-  racecheck_lock.Unlock();
-  return tm;
-}
-
-/**
- * Init the race verifier. Should be called exactly once before any other
- * functions in this file.
- * @param fileNames Names of TSan log to parse.
- * @param raceInfos Additional race description strings.
- */
-void RaceVerifierInit(const vector<string>& fileNames,
-    const vector<string>& raceInfos) {
-  races_map = new map<uintptr_t, PossibleRace*>();
-  racecheck_map = new AddressMap();
-  visit_count_map = new VisitCountMap();
-  ignore_addresses = new set<uintptr_t>();
-
-  for (vector<string>::const_iterator it = fileNames.begin();
-       it != fileNames.end(); ++it) {
-    RaceVerifierParseFile(*it);
-  }
-  for (vector<string>::const_iterator it = raceInfos.begin();
-       it != raceInfos.end(); ++it) {
-    RaceVerifierParseRaceInfo(*it);
-  }
-}
-
-void RaceVerifierFini() {
-  Report("RaceVerifier summary: verified %d race(s)\n", n_reports);
-  int n_errors = GetNumberOfFoundErrors();
-  SetNumberOfFoundErrors(n_errors + n_reports);
-}
diff --git a/tsan/ts_race_verifier.h b/tsan/ts_race_verifier.h
deleted file mode 100644
index ce273d2..0000000
--- a/tsan/ts_race_verifier.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Evgeniy Stepanov.
-
-/*
-  RaceVerifier is a tool for verifying race reports produced by ThreadSanitizer.
-  It works by adding time delays after potentially racey instructions and making
-  sure that they are executed simultaneously.
-
-  To use RaceVerifier, save the stderr log of a ThreadSanitizer run to a file
-  and run tsan again with --race-verifier=<log file name> option.
- */
-#ifndef TS_RACE_VERIFIER_H_
-#define TS_RACE_VERIFIER_H_
-
-bool RaceVerifierGetAddresses(uintptr_t min_pc, uintptr_t max_pc,
-    uintptr_t* instrument_pc);
-bool RaceVerifierStartAccess(int thread_id, uintptr_t addr, uintptr_t pc,
-    bool is_w);
-void RaceVerifierEndAccess(int thread_id, uintptr_t addr, uintptr_t pc,
-    bool is_w);
-int RaceVerifierGetSleepTime(uintptr_t trace_pc);
-
-void RaceVerifierInit(const std::vector<std::string>& fileNames,
-    const std::vector<std::string>& raceInfos);
-void RaceVerifierFini();
-
-#endif // TS_RACE_VERIFIER_H_
diff --git a/tsan/ts_replace.h b/tsan/ts_replace.h
deleted file mode 100644
index 3737b5c..0000000
--- a/tsan/ts_replace.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-//
-// Some libc functions are implemented in a way unfriendly to race detectors
-// and memcheck-like tools.
-// E.g. strlen() may read up to 7 bytes past the allocated buffer.
-// To avoid false positives in these functions, the tool needs to replace these
-// funcions with simpler implementation.
-//
-// The includer must define these macros:
-// REPORT_WRITE_RANGE, REPORT_READ_RANGE, EXTRA_REPLACE_PARAMS,
-// EXTRA_REPLACE_ARGS, NOINLINE
-// See ts_valgrind_intercepts.c and ts_pin.cc.
-
-#ifndef TS_REPLACE_H_
-#define TS_REPLACE_H_
-
-static NOINLINE char *Replace_memchr(EXTRA_REPLACE_PARAMS const char *s,
-                                     int c, size_t n) {
-  size_t i;
-  char *ret = 0;
-  for (i = 0; i < n; i++) {
-    if (s[i] == (char)c) {
-      ret = (char*)(&s[i]);
-      break;
-    }
-  }
-  REPORT_READ_RANGE(s, ret ? i + 1 : n);
-  return ret;
-}
-
-static NOINLINE char *Replace_strchr(EXTRA_REPLACE_PARAMS const char *s,
-                                     int c) {
-  size_t i;
-  char *ret = 0;
-  for (i = 0; ; i++) {
-    if (s[i] == (char)c) {
-      ret = (char*)(&s[i]);
-      break;
-    }
-    if (s[i] == 0) break;
-  }
-  REPORT_READ_RANGE(s, i + 1);
-  return ret;
-}
-
-static NOINLINE char *Replace_strchrnul(EXTRA_REPLACE_PARAMS const char *s,
-                                        int c) {
-  size_t i;
-  char *ret;
-  for (i = 0; ; i++) {
-    if (s[i] == (char)c || s[i] == 0) {
-      ret = (char*)(&s[i]);
-      break;
-    }
-  }
-  REPORT_READ_RANGE(s, i + 1);
-  return ret;
-}
-
-static NOINLINE char *Replace_strrchr(EXTRA_REPLACE_PARAMS const char *s,
-                                      int c) {
-  char* ret = 0;
-  size_t i;
-  for (i = 0; ; i++) {
-    if (s[i] == (char)c) {
-      ret = (char*)&s[i];
-    }
-    if (s[i] == 0) break;
-  }
-  REPORT_READ_RANGE(s, i + 1);
-  return ret;
-}
-
-static NOINLINE size_t Replace_strlen(EXTRA_REPLACE_PARAMS const char *s) {
-  size_t i = 0;
-  for (i = 0; s[i]; i++) {
-  }
-  REPORT_READ_RANGE(s, i + 1);
-  return i;
-}
-
-static NOINLINE char *Replace_memcpy(EXTRA_REPLACE_PARAMS char *dst,
-                                     const char *src, size_t len) {
-  size_t i;
-  for (i = 0; i < len; i++) {
-    dst[i] = src[i];
-  }
-  REPORT_READ_RANGE(src, i);
-  REPORT_WRITE_RANGE(dst, i);
-  return dst;
-}
-
-static NOINLINE char *Replace_memmove(EXTRA_REPLACE_PARAMS char *dst,
-                                     const char *src, size_t len) {
-
-  size_t i;
-  if (dst < src) {
-    for (i = 0; i < len; i++) {
-      dst[i] = src[i];
-    }
-  } else {
-    for (i = 0; i < len; i++) {
-      dst[len - i - 1] = src[len - i - 1];
-    }
-  }
-  REPORT_READ_RANGE(src, i);
-  REPORT_WRITE_RANGE(dst, i);
-  return dst;
-}
-
-static NOINLINE int Replace_memcmp(EXTRA_REPLACE_PARAMS const unsigned char *s1,
-                                     const unsigned char *s2, size_t len) {
-  size_t i;
-  int res = 0;
-  for (i = 0; i < len; i++) {
-    if (s1[i] != s2[i]) {
-      res = (int)s1[i] - (int)s2[i];
-      break;
-    }
-  }
-  REPORT_READ_RANGE(s1, min(i + 1, len));
-  REPORT_READ_RANGE(s2, min(i + 1, len));
-  return res;
-}
-
-static NOINLINE char *Replace_strcpy(EXTRA_REPLACE_PARAMS char *dst,
-                                     const char *src) {
-  size_t i;
-  for (i = 0; src[i]; i++) {
-    dst[i] = src[i];
-  }
-  dst[i] = 0;
-  REPORT_READ_RANGE(src, i + 1);
-  REPORT_WRITE_RANGE(dst, i + 1);
-  return dst;
-}
-
-static NOINLINE char *Replace_stpcpy(EXTRA_REPLACE_PARAMS char *dst,
-                                     const char *src) {
-  size_t i;
-  for (i = 0; src[i]; i++) {
-    dst[i] = src[i];
-  }
-  dst[i] = 0;
-  REPORT_READ_RANGE(src, i + 1);
-  REPORT_WRITE_RANGE(dst, i + 1);
-  return dst + i;
-}
-
-static NOINLINE char *Replace_strncpy(EXTRA_REPLACE_PARAMS char *dst,
-                                     const char *src, size_t n) {
-  size_t i;
-  for (i = 0; i < n; i++) {
-    dst[i] = src[i];
-    if (src[i] == 0) break;
-  }
-  REPORT_READ_RANGE(src, min(i + 1, n));
-  while (i < n) {
-    dst[i] = 0;
-    i++;
-  }
-  REPORT_WRITE_RANGE(dst, n);
-  return dst;
-}
-
-
-static NOINLINE int Replace_strcmp(EXTRA_REPLACE_PARAMS const char *s1,
-                                   const char *s2) {
-  unsigned char c1;
-  unsigned char c2;
-  size_t i;
-  for (i = 0; ; i++) {
-    c1 = (unsigned char)s1[i];
-    c2 = (unsigned char)s2[i];
-    if (c1 != c2) break;
-    if (c1 == 0) break;
-  }
-  REPORT_READ_RANGE(s1, i+1);
-  REPORT_READ_RANGE(s2, i+1);
-  if (c1 < c2) return -1;
-  if (c1 > c2) return 1;
-  return 0;
-}
-
-static NOINLINE int Replace_strncmp(EXTRA_REPLACE_PARAMS const char *s1,
-                                    const char *s2, size_t n) {
-  unsigned char c1 = 0;
-  unsigned char c2 = 0;
-  size_t i;
-  for (i = 0; i < n; i++) {
-    c1 = (unsigned char)s1[i];
-    c2 = (unsigned char)s2[i];
-    if (c1 != c2) break;
-    if (c1 == 0) break;
-  }
-  REPORT_READ_RANGE(s1, min(i + 1, n));
-  REPORT_READ_RANGE(s2, min(i + 1, n));
-  if (c1 < c2) return -1;
-  if (c1 > c2) return 1;
-  return 0;
-}
-
-static NOINLINE char *Replace_strcat(EXTRA_REPLACE_PARAMS char *dest,
-                                     const char *src) {
-  size_t dest_len = Replace_strlen(EXTRA_REPLACE_ARGS dest);
-  Replace_strcpy(EXTRA_REPLACE_ARGS dest + dest_len, src);
-  return dest;
-}
-
-#if defined(TS_VALGRIND)
-// Read every byte in the memory range.
-static NOINLINE void ReadMemory(const void* p, size_t size) {
-  const volatile char* start = (const volatile char*)p;
-  const volatile char* end = start + size;
-  volatile char tmp = 0;
-  for (; start < end; ++start) {
-    // If we just read the bytes, Valgrind will optimize it out.
-    tmp ^= *start;
-  }
-}
-
-// Read every byte in the null-terminated string.
-static NOINLINE void ReadString(const char* s) {
-  const volatile char* p = (const volatile char*)s;
-  volatile char tmp = 0;
-  char c;
-  for (; (c = *p); ++p) {
-    tmp ^= c;
-  }
-}
-#endif   // TS_VALGRIND
-
-#endif  // TS_REPLACE_H_
diff --git a/tsan/ts_simple_cache.h b/tsan/ts_simple_cache.h
deleted file mode 100644
index f5b65e8..0000000
--- a/tsan/ts_simple_cache.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-#ifndef TS_SIMPLE_CACHE_
-#define TS_SIMPLE_CACHE_
-
-#include "ts_util.h"
-
-// Few simple 'cache' classes.
-// -------- PtrToBoolCache ------ {{{1
-// Maps a pointer to a boolean.
-template <int kSize>
-class PtrToBoolCache {
- public:
-  PtrToBoolCache() {
-    Flush();
-  }
-  void Flush() {
-    memset(this, 0, sizeof(*this));
-  }
-  void Insert(uintptr_t ptr, bool val) {
-    size_t idx  = ptr % kSize;
-    arr_[idx] = ptr;
-    if (val) {
-      bits_[idx / 32] |= 1U << (idx % 32);
-    } else {
-      bits_[idx / 32] &= ~(1U << (idx % 32));
-    }
-  }
-  bool Lookup(uintptr_t ptr, bool *val) {
-    size_t idx  = ptr % kSize;
-    if (arr_[idx] == ptr) {
-      *val = (bits_[idx / 32] >> (idx % 32)) & 1;
-      return true;
-    }
-    return false;
-  }
- private:
-  uintptr_t arr_[kSize];
-  uint32_t bits_[(kSize + 31) / 32];
-};
-
-// -------- IntPairToBoolCache ------ {{{1
-// Maps two integers to a boolean.
-// The second integer should be less than 1^31.
-template <int32_t kSize>
-class IntPairToBoolCache {
- public:
-  IntPairToBoolCache() {
-    Flush();
-  }
-  void Flush() {
-    memset(arr_, 0, sizeof(arr_));
-  }
-  void Insert(uint32_t a, uint32_t b, bool val) {
-    DCHECK((int32_t)b >= 0);
-    uint32_t i = idx(a, b);
-    if (val) {
-      b |= 1U << 31;
-    }
-    arr_[i * 2 + 0] = a;
-    arr_[i * 2 + 1] = b;
-  }
-  bool Lookup(uint32_t a, uint32_t b, bool *val) {
-    DCHECK((int32_t)b >= 0);
-    uint32_t i = idx(a, b);
-    if (arr_[i * 2] != a) return false;
-    uint32_t maybe_b = arr_[i * 2 + 1];
-    if (b == (maybe_b & (~(1U << 31)))) {
-      *val = (maybe_b & (1U << 31)) != 0;
-      return true;
-    }
-    return false;
-  }
- private:
-  uint32_t idx(uint32_t a, uint32_t b) {
-    return (a ^ ((b >> 16) | (b << 16))) % kSize;
-  }
-  uint32_t arr_[kSize * 2];
-};
-
-// end. {{{1
-#endif  // TS_SIMPLE_CACHE_
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/ts_stats.h b/tsan/ts_stats.h
deleted file mode 100644
index 42475bb..0000000
--- a/tsan/ts_stats.h
+++ /dev/null
@@ -1,296 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-#ifndef TS_STATS_
-#define TS_STATS_
-
-#include "dynamic_annotations.h"
-#include "ts_util.h"
-
-// Statistic counters for each thread.
-// For stats accessed concurrently from different threads
-// we don't want to use global stats to avoid cache line ping-pong.
-struct ThreadLocalStats {
-  ThreadLocalStats() { Clear(); }
-  void Clear() {
-    memset(this, 0, sizeof(*this));
-  }
-  uintptr_t memory_access_sizes[18];
-  uintptr_t events[LAST_EVENT];
-  uintptr_t unlocked_access_ok;
-  uintptr_t n_fast_access1, n_fast_access2, n_fast_access4, n_fast_access8,
-            n_slow_access1, n_slow_access2, n_slow_access4, n_slow_access8,
-            n_very_slow_access, n_access_slow_iter;
-
-  uintptr_t mops_per_trace[16];
-  uintptr_t locks_per_trace[16];
-  uintptr_t locked_access[8];
-  uintptr_t history_uses_same_segment, history_creates_new_segment,
-            history_reuses_segment, history_uses_preallocated_segment;
-
-  uintptr_t msm_branch_count[16];
-
-  uintptr_t access_to_first_1g;
-  uintptr_t access_to_first_2g;
-  uintptr_t access_to_first_4g;
-};
-
-// Statistic counters for the entire tool, including aggregated
-// ThreadLocalStats (which are made private so that one can not
-// increment them using the global stats object).
-struct Stats : private ThreadLocalStats {
-  Stats() {
-    memset(this, 0, sizeof(*this));
-    ANNOTATE_BENIGN_RACE(&vts_clone, "Race on vts_clone");
-    ANNOTATE_BENIGN_RACE(&ignore_below_cache_miss,
-                         "Race on ignore_below_cache_miss");
-    ANNOTATE_BENIGN_RACE_SIZED(msm_branch_count, sizeof(msm_branch_count),
-                               "Race on msm_branch_count[]");
-  }
-
-  void Add(const ThreadLocalStats &s) {
-    uintptr_t *p1 = (uintptr_t*)this;
-    uintptr_t *p2 = (uintptr_t*)&s;
-    size_t n = sizeof(s) / sizeof(uintptr_t);
-    for (size_t i = 0; i < n; i++) {
-      p1[i] += p2[i];
-    }
-  }
-
-  void PrintStats() {
-    PrintEventStats();
-    Printf("   VTS: created small/big: %'ld / %'ld; "
-           "deleted small/big: %'ld / %'ld; cloned: %'ld\n",
-           vts_create_small, vts_create_big,
-           vts_delete_small, vts_delete_big, vts_clone);
-    Printf("   vts_total_create  = %'ld; avg=%'ld; delete = %'ld\n",
-           vts_total_create,
-           vts_total_create / (vts_create_small + vts_create_big + 1),
-           vts_total_delete);
-    Printf("   n_seg_hb        = %'ld\n", n_seg_hb);
-    Printf("   n_vts_hb        = %'ld\n", n_vts_hb);
-    Printf("   n_vts_hb_cached = %'ld\n", n_vts_hb_cached);
-    Printf("   memory access:\n"
-           "     1: %'ld / %'ld\n"
-           "     2: %'ld / %'ld\n"
-           "     4: %'ld / %'ld\n"
-           "     8: %'ld / %'ld\n"
-           "     s: %'ld\n",
-           n_fast_access1, n_slow_access1,
-           n_fast_access2, n_slow_access2,
-           n_fast_access4, n_slow_access4,
-           n_fast_access8, n_slow_access8,
-           n_very_slow_access);
-    PrintStatsForCache();
-//    Printf("   Mops:\n"
-//           "    total  = %'ld\n"
-//           "    unique = %'ld\n",
-//           mops_total, mops_uniq);
-    Printf("   Publish: set: %'ld; get: %'ld; clear: %'ld\n",
-           publish_set, publish_get, publish_clear);
-
-    Printf("   PcTo: all: %'ld\n", pc_to_strings);
-
-    Printf("   StackTrace: create: %'ld; delete %'ld\n",
-           stack_trace_create, stack_trace_delete);
-
-    Printf("   History segments: same: %'ld; reuse: %'ld; "
-           "preallocated: %'ld; new: %'ld\n",
-           history_uses_same_segment, history_reuses_segment,
-           history_uses_preallocated_segment, history_creates_new_segment);
-    Printf("   Forget all history: %'ld\n", n_forgets);
-
-    PrintStatsForSeg();
-    PrintStatsForSS();
-    PrintStatsForLS();
-  }
-
-  void PrintStatsForSS() {
-    Printf("   SegmentSet: created: %'ld; reused: %'ld;"
-           " find: %'ld; recycle: %'ld\n",
-           ss_create, ss_reuse, ss_find, ss_recycle);
-    Printf("        sizes: 2: %'ld; 3: %'ld; 4: %'ld; other: %'ld\n",
-           ss_size_2, ss_size_3, ss_size_4, ss_size_other);
-
-    // SSEq is called at least (ss_find + ss_recycle) times since
-    // FindExistingOrAlocateAndCopy calls map_.find()
-    // and RecycleOneSegmentSet calls map_.erase(it)
-    // Both find() and erase(it) require at least one call to SSHash and SSEq.
-    //
-    // Apart from SSHash call locations mentioned above,
-    // SSHash is called for each AllocateAndCopy (ss_create + ss_reuse) times
-    // for insert() AFTER it has already been called
-    // by FindExistingOrAlocateAndCopy in case find() returned map_.end().
-    // Hence the factor of 2.
-    uintptr_t sseq_estimated = ss_find + ss_recycle,
-            sshash_estimated = sseq_estimated + 2 * (ss_create + ss_reuse);
-    Printf("   SSHash called %12ld times (vs. %12ld = +%d%%)\n"
-           "   SSEq   called %12ld times (vs. %12ld = +%d%%)\n",
-            sshash_calls, sshash_estimated,
-            (sshash_calls - sshash_estimated)/(sshash_estimated/100 + 1),
-            sseq_calls,   sseq_estimated,
-            (sseq_calls   - sseq_estimated  )/(sseq_estimated/100 + 1));
-  }
-  void PrintStatsForCache() {
-    Printf("   Cache:\n"
-           "    new       = %'ld\n"
-           "    delete    = %'ld\n"
-           "    fetch     = %'ld\n"
-           "    storage   = %'ld\n",
-           cache_new_line,
-           cache_delete_empty_line, cache_fetch,
-           cache_max_storage_size);
-  }
-
-  void PrintStatsForSeg() {
-    Printf("   Segment: created: %'ld; reused: %'ld\n",
-           seg_create, seg_reuse);
-  }
-
-  void PrintStatsForLS() {
-    Printf("   LockSet add: 0: %'ld; 1 : %'ld; n : %'ld\n",
-           ls_add_to_empty, ls_add_to_singleton, ls_add_to_multi);
-    Printf("   LockSet rem: 1: %'ld; n : %'ld\n",
-           ls_remove_from_singleton, ls_remove_from_multi);
-    Printf("   LockSet cache: add : %'ld; rem : %'ld; fast: %'ld\n",
-           ls_add_cache_hit, ls_rem_cache_hit, ls_cache_fast);
-    Printf("   LockSet size: 2: %'ld 3: %'ld 4: %'ld 5: %'ld other: %'ld\n",
-           ls_size_2, ls_size_3, ls_size_4, ls_size_5, ls_size_other);
-  }
-
-  void PrintEventStats() {
-    uintptr_t total = 0;
-    for (int i = 0; i < LAST_EVENT; i++) {
-      if (events[i]) {
-        Printf("  %25s: %'ld\n", Event::TypeString((EventType)i),
-               events[i]);
-      }
-      total += events[i];
-    }
-    Printf("  %25s: %'ld\n", "Total", total);
-    for (size_t i = 0; i < TS_ARRAY_SIZE(memory_access_sizes); i++) {
-      if (memory_access_sizes[i]) {
-        Printf("  mop[%d]: %'ld\n", i, memory_access_sizes[i]);
-      }
-    }
-    for (size_t i = 0; i < TS_ARRAY_SIZE(mops_per_trace); i++) {
-      Printf("  mops_per_trace[%d] = %'ld\n", i, mops_per_trace[i]);
-    }
-    for (size_t i = 0; i < TS_ARRAY_SIZE(locks_per_trace); i++) {
-      Printf("  locks_per_trace[%d] = %'ld\n", i, locks_per_trace[i]);
-    }
-
-    uintptr_t total_locks = 0;
-    for (size_t i = 0; i < TS_ARRAY_SIZE(lock_sites); i++) {
-      if(lock_sites[i] == 0) continue;
-      Printf("lock_sites[%ld]=%ld\n", i, lock_sites[i]);
-      total_locks += lock_sites[i];
-    }
-    Printf("lock_sites[*]=%ld\n", total_locks);
-    Printf("futex_wait   =%ld\n", futex_wait);
-    Printf("unlocked_access_ok =%'ld\n", unlocked_access_ok);
-    uintptr_t all_locked_access = 0;
-    for (size_t i = 0; i < TS_ARRAY_SIZE(locked_access); i++) {
-      uintptr_t t = locked_access[i];
-      if (t) Printf("locked_access[%ld]   =%'ld\n", i, t);
-      all_locked_access += t;
-    }
-    Printf("locked_access[*]   =%'ld\n", all_locked_access);
-    Printf("try_acquire_line_spin =%ld\n", try_acquire_line_spin);
-    Printf("access to first 1/2/4 G: %'ld %'ld %'ld\n",
-           access_to_first_1g, access_to_first_2g, access_to_first_4g);
-
-
-    for (size_t i = 0; i < TS_ARRAY_SIZE(tleb_flush); i++) {
-      if(tleb_flush[i] == 0) continue;
-      Printf("tleb_flush[%ld]=%ld\n", i, tleb_flush[i]);
-    }
-    Printf("IgnoreBelowCache miss=%ld\n", ignore_below_cache_miss);
-    for (size_t i = 0; i < TS_ARRAY_SIZE(msm_branch_count); i++) {
-      if (msm_branch_count[i])
-        Printf("msm_branch_count[%02d] = %'ld\n", i, msm_branch_count[i]);
-    }
-    if (read_proc_self_stats)
-      Printf("read_proc_self_stats   =%ld\n", read_proc_self_stats);
-  }
-
-
-
-  uintptr_t n_vts_hb;
-  uintptr_t n_vts_hb_cached;
-  uintptr_t n_seg_hb;
-
-  uintptr_t ls_add_to_empty, ls_add_to_singleton, ls_add_to_multi,
-            ls_remove_from_singleton, ls_remove_from_multi,
-            ls_add_cache_hit, ls_rem_cache_hit,
-            ls_cache_fast,
-            ls_size_2, ls_size_3, ls_size_4, ls_size_5, ls_size_other;
-
-  uintptr_t cache_new_line;
-  uintptr_t cache_delete_empty_line;
-  uintptr_t cache_fetch;
-  uintptr_t cache_max_storage_size;
-
-  uintptr_t mops_total;
-  uintptr_t mops_uniq;
-
-  uintptr_t vts_create_big, vts_create_small,
-            vts_clone, vts_delete_small, vts_delete_big,
-            vts_total_delete, vts_total_create;
-
-  uintptr_t ss_create, ss_reuse, ss_find, ss_recycle;
-  uintptr_t ss_size_2, ss_size_3, ss_size_4, ss_size_other;
-
-  uintptr_t sshash_calls, sseq_calls;
-
-  uintptr_t seg_create, seg_reuse;
-
-  uintptr_t publish_set, publish_get, publish_clear;
-
-  uintptr_t pc_to_strings;
-
-  uintptr_t stack_trace_create, stack_trace_delete;
-
-  uintptr_t n_forgets;
-
-  uintptr_t lock_sites[20];
-
-  uintptr_t tleb_flush[10];
-
-  uintptr_t ignore_below_cache_miss;
-
-  uintptr_t try_acquire_line_spin;
-  uintptr_t futex_wait;
-  uintptr_t read_proc_self_stats;
-};
-
-
-// end. {{{1
-#endif  // TS_STATS_
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/ts_trace_info.h b/tsan/ts_trace_info.h
deleted file mode 100644
index 3fe9478..0000000
--- a/tsan/ts_trace_info.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Information about one TRACE (single-entry-multiple-exit region of code).
-#ifndef TS_TRACE_INFO_
-#define TS_TRACE_INFO_
-
-#include "ts_util.h"
-// Information about one Memory Operation.
-//
-// A memory access is represented by mop[idx] = {pc,size,is_write}
-// which is computed at instrumentation time and {actual_address} computed
-// at run-time. The instrumentation insn looks like
-//  tleb[idx] = actual_address
-// The create_sblock field tells if we want to remember the stack trace
-// which corresponds to this Mop (i.e. create an SBLOCK).
-struct MopInfo {
- public:
-  MopInfo(uintptr_t pc, size_t size, bool is_write, bool create_sblock) {
-    DCHECK(sizeof(*this) == 8);
-    pc_ = pc;
-    if (size > 16) size = 16; // some instructions access more than 16 bytes.
-    size_minus1_ = size - 1;
-    is_write_ = is_write;
-    create_sblock_ = create_sblock;
-
-    DCHECK(size != 0);
-    DCHECK(this->size() == size);
-    DCHECK(this->is_write() == is_write);
-    DCHECK(this->create_sblock() == create_sblock);
-  }
-
-  MopInfo() {
-    DCHECK(sizeof(*this) == 8);
-    memset(this, 0, sizeof(*this));
-  }
-
-  uintptr_t pc()            { return pc_; };
-  size_t    size()          { return size_minus1_ + 1; }
-  bool      is_write()      { return is_write_; }
-  bool      create_sblock() { return create_sblock_; }
-
- private:
-  uint64_t  pc_           :58;  // 48 bits is enough for pc, even on x86-64.
-  uint64_t  create_sblock_ :1;
-  uint64_t  is_write_      :1;
-  uint64_t  size_minus1_   :4;  // 0..15
-};
-
-// ---------------- Lite Race ------------------
-// Experimental!
-//
-// The idea was first introduced in LiteRace:
-// http://www.cs.ucla.edu/~dlmarino/pubs/pldi09.pdf
-// Instead of analyzing all memory accesses, we do sampling.
-// For each trace (single-enry muliple-exit region) we maintain a counter of
-// executions. If a trace has been executed more than a certain threshold, we
-// start skipping this trace sometimes.
-// The LiteRace paper suggests several strategies for sampling, including
-// thread-local counters. Having thread local counters for all threads is too
-// expensive, so we have kLiteRaceNumTids arrays of counters and use
-// the array (tid % 8).
-//
-// sampling_rate indicates the level of sampling.
-// 0 means no sampling.
-// 1 means handle *almost* all accesses.
-// ...
-// 31 means very aggressive sampling (skip a lot of accesses).
-
-//
-// Note: ANNOTATE_PUBLISH_MEMORY() does not work with sampling... :(
-
-struct LiteRaceCounters {
-  uint32_t counter;
-  int32_t num_to_skip;
-};
-
-struct TraceInfoPOD {
-  enum { kLiteRaceNumTids = 8 };
-  enum { kLiteRaceStorageSize = 8 };
-  typedef LiteRaceCounters LiteRaceStorage[kLiteRaceNumTids][kLiteRaceStorageSize];
-
-  size_t n_mops_;
-  size_t pc_;
-  size_t counter_;
-  LiteRaceStorage *literace_storage;
-  int32_t storage_index;
-  MopInfo mops_[1];
-};
-
-// An instance of this class is created for each TRACE (SEME region)
-// during instrumentation.
-class TraceInfo : public TraceInfoPOD {
- public:
-  static TraceInfo *NewTraceInfo(size_t n_mops, uintptr_t pc);
-  void DeleteTraceInfo(TraceInfo *trace_info) {
-    delete [] (uintptr_t*)trace_info;
-  }
-  MopInfo *GetMop(size_t i) {
-    DCHECK(i < n_mops_);
-    return &mops_[i];
-  }
-
-  size_t n_mops() const { return n_mops_; }
-  size_t pc()     const { return pc_; }
-  size_t &counter()     { return counter_; }
-  MopInfo *mops()       { return mops_; }
-
-  static void PrintTraceProfile();
-
-  INLINE bool LiteRaceSkipTraceQuickCheck(uintptr_t tid_modulo_num) {
-    DCHECK(tid_modulo_num < kLiteRaceNumTids);
-    // Check how may accesses are left to skip. Racey, but ok.
-    LiteRaceCounters *counters =
-        &((*literace_storage)[tid_modulo_num][storage_index]);
-    int32_t num_to_skip = --counters->num_to_skip;
-    if (num_to_skip > 0) {
-      return true;
-    }
-    return false;
-  }
-
-  INLINE void LiteRaceUpdate(uintptr_t tid_modulo_num, uint32_t sampling_rate) {
-    DCHECK(sampling_rate < 32);
-    DCHECK(sampling_rate > 0);
-    LiteRaceCounters *counters =
-        &((*literace_storage)[tid_modulo_num][storage_index]);
-    uint32_t cur_counter = counters->counter;
-    // The bigger the counter the bigger the number of skipped accesses.
-    int32_t next_num_to_skip = (cur_counter >> (32 - sampling_rate)) + 1;
-    counters->num_to_skip = next_num_to_skip;
-    counters->counter = cur_counter + next_num_to_skip;
-
-  }
-
-  // TODO(glider): get rid of this.
-  INLINE void LLVMLiteRaceUpdate(uintptr_t tid_modulo_num,
-                                 uint32_t sampling_rate) {
-    LiteRaceUpdate(tid_modulo_num, sampling_rate);
-  }
-
-  // This is all racey, but ok.
-  INLINE bool LiteRaceSkipTrace(uint32_t tid_modulo_num,
-                                uint32_t sampling_rate) {
-    if (LiteRaceSkipTraceQuickCheck(tid_modulo_num)) return true;
-    LiteRaceUpdate(tid_modulo_num, sampling_rate);
-    return false;
-  }
-
-  INLINE bool LiteRaceSkipTraceRealTid(uint32_t tid, uint32_t sampling_rate) {
-    return LiteRaceSkipTrace(tid % kLiteRaceNumTids, sampling_rate);
-  }
-
- private:
-  static size_t id_counter_;
-  static vector<TraceInfo*> *g_all_traces;
-
-  TraceInfo() : TraceInfoPOD() { }
-};
-
-// end. {{{1
-#endif  // TS_TRACE_INFO_
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/ts_util.cc b/tsan/ts_util.cc
deleted file mode 100644
index 12fd3f2..0000000
--- a/tsan/ts_util.cc
+++ /dev/null
@@ -1,835 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-//
-// See ts_util.h for mode details.
-
-#include "common_util.h"
-#include "thread_sanitizer.h"
-#include "ts_stats.h"
-#include "ts_lock.h"
-#include <stdarg.h>
-
-FLAGS *G_flags = NULL;
-
-#if defined(_MSC_VER)
-
-#pragma comment(lib, "winmm.lib")
-
-# ifdef TS_PIN
-#  include "pin.H"
-# endif
-namespace WINDOWS
-{
-// This is the way of including winows.h recommended by PIN docs.
-#include<Windows.h>
-}
-int getpid() { return WINDOWS::GetCurrentProcessId(); }
-#endif
-
-#if defined(TS_VALGRIND)
-size_t TimeInMilliSeconds() {
-  return VG_(read_millisecond_timer)();
-}
-#else
-// TODO(kcc): implement this.
-size_t TimeInMilliSeconds() {
-#ifdef __GNUC__
-  return time(0) * 1000;
-#else
-  return WINDOWS::timeGetTime();
-#endif
-}
-#endif
-
-Stats *G_stats;
-
-#ifndef TS_LLVM
-bool GetNameAndOffsetOfGlobalObject(uintptr_t addr,
-                                    string *name, uintptr_t *offset) {
-# ifdef TS_VALGRIND
-    const int kBufLen = 1023;
-    char buff[kBufLen+1];
-    PtrdiffT off;
-    if (VG_(get_datasym_and_offset)(addr, reinterpret_cast<Char*>(buff),
-                                    kBufLen, &off)) {
-      *name = buff;
-      *offset = off;
-      return true;
-    }
-    return false;
-# else
-  return false;
-# endif  // TS_VALGRIND
-}
-#endif  // TS_LLVM
-
-
-#ifndef TS_VALGRIND
-void GetThreadStack(int tid, uintptr_t *min_addr, uintptr_t *max_addr) {
-  *min_addr = 0xfffa;
-  *max_addr = 0xfffb;
-}
-#endif
-
-static int n_errs_found;
-
-void SetNumberOfFoundErrors(int n_errs) {
-  n_errs_found = n_errs;
-}
-
-int GetNumberOfFoundErrors() {
-  return n_errs_found;
-}
-
-
-#if !defined(TS_VALGRIND) && !defined(TS_LLVM)
-FILE *G_out = stderr;
-#endif
-
-#ifdef TS_LLVM
-FILE *G_out;
-#endif
-
-static string RemoveUnsupportedFormat(const char *str) {
-#ifdef _MSC_VER
-  // replace "%'" with "%"
-  string res;
-  size_t n = strlen(str);
-  if (n == 0) {
-    return "";
-  }
-  res.reserve(n);
-  res.push_back(str[0]);
-  for (size_t i = 1; i < n; i++) {
-    if (str[i] == '\'' && *res.rbegin() == '%') continue;
-    res.push_back(str[i]);
-  }
-  return res;
-#else
-  return str;
-#endif
-}
-
-void Printf(const char *format, ...) {
-#ifdef TS_VALGRIND
-  va_list args;
-  va_start(args, format);
-  VG_(vprintf)(format, args);
-  va_end(args);
-#else
-  va_list args;
-  va_start(args, format);
-  vfprintf(G_out, RemoveUnsupportedFormat(format).c_str(), args);
-  fflush(G_out);
-  va_end(args);
-#endif
-}
-
-// Like Print(), but prepend each line with ==XXXXX==,
-// where XXXXX is the pid.
-void Report(const char *format, ...) {
-  int buff_size = 1024*16;
-  char *buff = new char[buff_size];
-  CHECK(buff);
-  DCHECK(G_flags);
-
-  va_list args;
-
-  while (1) {
-    va_start(args, format);
-    int ret = vsnprintf(buff, buff_size,
-                        RemoveUnsupportedFormat(format).c_str(), args);
-    va_end(args);
-    if (ret < buff_size) break;
-    delete [] buff;
-    buff_size *= 2;
-    buff = new char[buff_size];
-    CHECK(buff);
-    // Printf("Resized buff: %d\n", buff_size);
-  }
-
-  char pid_buff[100];
-  snprintf(pid_buff, sizeof(pid_buff), "==%d== ", getpid());
-
-  string res;
-#ifndef TS_LLVM
-  int len = strlen(buff);
-#else
-  int len = __real_strlen(buff);
-#endif
-  bool last_was_new_line = true;
-  for (int i = 0; i < len; i++) {
-    if (G_flags->show_pid && last_was_new_line)
-      res += pid_buff;
-    last_was_new_line = (buff[i] == '\n');
-    res += buff[i];
-  }
-
-  delete [] buff;
-
-  Printf("%s", res.c_str());
-}
-
-long my_strtol(const char *str, char **end, int base) {
-#ifdef TS_VALGRIND
-  if (base == 16 || (base == 0 && str && str[0] == '0' && str[1] == 'x')) {
-    return VG_(strtoll16)((Char*)str, (Char**)end);
-  }
-  return VG_(strtoll10)((Char*)str, (Char**)end);
-#else
-  return strtoll(str, end, base);
-#endif
-}
-
-// Not thread-safe. Need to make it thread-local if we allow
-// malloc to be called concurrently.
-MallocCostCenterStack g_malloc_stack;
-
-size_t GetVmSizeInMb() {
-#ifdef VGO_linux
-  const char *path ="/proc/self/statm";  // see 'man proc'
-  uintptr_t counter = G_stats->read_proc_self_stats++;
-  if (counter >= 1024 && ((counter & (counter - 1)) == 0))
-    Report("INFO: reading %s for %ld'th time\n", path, counter);
-  int  fd = OpenFileReadOnly(path, false);
-  if (fd < 0) return 0;
-  char buff[128];
-  int n_read = read(fd, buff, sizeof(buff) - 1);
-  buff[n_read] = 0;
-  close(fd);
-  char *end;
-  size_t vm_size_in_pages = my_strtol(buff, &end, 10);
-  return vm_size_in_pages >> 8;
-#else
-  return 0;
-#endif
-}
-
-static string StripTemplatesFromFunctionName(const string &fname) {
-  // Returns "" in case of error.
-
-  string ret;
-  size_t read_pointer = 0, braces_depth = 0;
-
-  while (read_pointer < fname.size()) {
-    size_t next_brace = fname.find_first_of("<>", read_pointer);
-    if (next_brace == fname.npos) {
-      if (braces_depth > 0) {
-        // This can happen on Visual Studio if we reach the ~2000 char limit.
-        CHECK(fname.size() > 256);
-        return "";
-      }
-      ret += (fname.c_str() + read_pointer);
-      break;
-    }
-
-    if (braces_depth == 0) {
-      ret.append(fname, read_pointer, next_brace - read_pointer);
-    }
-
-    if (next_brace > 0) {
-      // We could have found one of the following operators.
-      const char *OP[] = {">>=", "<<=",
-                          ">>", "<<",
-                          ">=", "<=",
-                          "->", "->*",
-                          "<", ">"};
-
-      bool operator_name = false;
-      for (size_t i = 0; i < TS_ARRAY_SIZE(OP); i++) {
-        size_t op_offset = ((string)OP[i]).find(fname[next_brace]);
-        if (op_offset == string::npos)
-          continue;
-        if (next_brace >= 8 + op_offset &&  // 8 == strlen("operator");
-            "operator" == fname.substr(next_brace - (8 + op_offset), 8) &&
-            OP[i] == fname.substr(next_brace - op_offset, strlen(OP[i]))) {
-          operator_name = true;
-          ret += OP[i] + op_offset;
-          next_brace += strlen(OP[i] + op_offset);
-          read_pointer = next_brace;
-          break;
-        }
-      }
-
-      if (operator_name)
-        continue;
-    }
-
-    if (fname[next_brace] == '<') {
-      braces_depth++;
-      read_pointer = next_brace + 1;
-    } else if (fname[next_brace] == '>') {
-      if (braces_depth == 0) {
-        // Going to `braces_depth == -1` IS possible at least for this function on Windows:
-        // "std::operator<<char,std::char_traits<char>,std::allocator<char> >".
-        // Oh, well... Return an empty string and let the caller decide.
-        return "";
-      }
-      braces_depth--;
-      read_pointer = next_brace + 1;
-    } else
-      CHECK(0);
-  }
-  if (braces_depth != 0) {
-    CHECK(fname.size() > 256);
-    return "";
-  }
-  return ret;
-}
-
-static string StripParametersFromFunctionName(const string &demangled) {
-  // Returns "" in case of error.
-
-  string fname = demangled;
-
-  // Strip stuff like "(***)" and "(anonymous namespace)" -> they are tricky.
-  size_t found = fname.npos;
-  while ((found = fname.find(", ")) != fname.npos)
-    fname.erase(found+1, 1);
-  while ((found = fname.find("(**")) != fname.npos)
-    fname.erase(found+2, 1);
-  while ((found = fname.find("(*)")) != fname.npos)
-    fname.erase(found, 3);
-  while ((found = fname.find("const()")) != fname.npos)
-    fname.erase(found+5, 2);
-  while ((found = fname.find("const volatile")) != fname.npos &&
-         found > 1 && found + 14 == fname.size())
-    fname.erase(found-1);
-  while ((found = fname.find("(anonymous namespace)")) != fname.npos)
-    fname.erase(found, 21);
-
-  if (fname.find_first_of("(") == fname.npos)
-    return fname;
-  DCHECK(count(fname.begin(), fname.end(), '(') ==
-         count(fname.begin(), fname.end(), ')'));
-
-  string ret;
-  bool returns_fun_ptr = false;
-  size_t braces_depth = 0, read_pointer = 0;
-
-  size_t first_parenthesis = fname.find("(");
-  if (first_parenthesis != fname.npos) {
-    DCHECK(fname.find_first_of(")") != fname.npos);
-    DCHECK(fname.find_first_of(")") > first_parenthesis);
-    DCHECK(fname[first_parenthesis] == '(');
-    if (first_parenthesis + 2 < fname.size() &&
-        fname[first_parenthesis - 1] == ' ' &&
-        fname[first_parenthesis + 1] == '*' &&
-        fname[first_parenthesis + 2] != ' ') {
-      // Return value type is a function pointer
-      read_pointer = first_parenthesis + 2;
-      while (fname[read_pointer] == '*' || fname[read_pointer] == '&')
-        read_pointer++;
-      braces_depth = 1;
-      returns_fun_ptr = true;
-    }
-  }
-
-  while (read_pointer < fname.size()) {
-    size_t next_brace = fname.find_first_of("()", read_pointer);
-    if (next_brace == fname.npos) {
-      if (braces_depth != 0) {
-        // Overflow?
-        return "";
-      }
-      size_t _const = fname.find(" const", read_pointer);
-      if (_const == fname.npos) {
-        ret += (fname.c_str() + read_pointer);
-      } else {
-        CHECK(_const + 6 == fname.size());
-        ret.append(fname, read_pointer, _const - read_pointer);
-      }
-      break;
-    }
-
-    if (braces_depth == (returns_fun_ptr ? 1 : 0)) {
-      ret.append(fname, read_pointer, next_brace - read_pointer);
-      returns_fun_ptr = false;
-    }
-
-    if (fname[next_brace] == '(') {
-      if (next_brace >= 8 && fname[next_brace+1] == ')' &&
-          "operator" == fname.substr(next_brace - 8, 8)) {
-        ret += "()";
-        read_pointer = next_brace + 2;
-      } else {
-        braces_depth++;
-        read_pointer = next_brace + 1;
-      }
-    } else if (fname[next_brace] == ')') {
-      CHECK(braces_depth > 0);
-      braces_depth--;
-      read_pointer = next_brace + 1;
-    } else
-      CHECK(0);
-  }
-  if (braces_depth != 0)
-    return "";
-
-  // Special case: on Linux, Valgrind prepends the return type for template
-  // functions. And on Windows we may see `scalar deleting destructor'.
-  // And we may see "operaror new" etc.
-  // And some STL code inserts const& between the return type and the function
-  // name.
-  // Oh, well...
-  size_t space_or_tick;
-  while (ret != "") {
-    space_or_tick = ret.find_first_of("` ");
-    if (space_or_tick != ret.npos && ret[space_or_tick] == ' ' &&
-        ret.substr(0, space_or_tick).find("operator") == string::npos) {
-      ret = ret.substr(space_or_tick + 1);
-    } else if (space_or_tick != ret.npos && space_or_tick + 1 == ret.size()) {
-      ret = ret.substr(0, space_or_tick);
-    } else {
-      break;
-    }
-  }
-  return ret;
-}
-
-string NormalizeFunctionName(const string &demangled) {
-  if (demangled[1] == '[' && strchr("+-=", demangled[0]) != NULL) {
-    // Objective-C function
-    return demangled;
-  }
-
-  if (demangled.find_first_of("<>()") == demangled.npos) {
-    // C function or a well-formatted function name.
-    return demangled;
-  }
-
-  if (demangled == "(below main)" || demangled == "(no symbols)")
-    return demangled;
-
-  const char* const MALFORMED = "(malformed frame)";
-
-  string fname = StripTemplatesFromFunctionName(demangled);
-  if (fname.size() == 0) {
-    if (DEBUG_MODE)
-      Printf("PANIC: `%s`\n", demangled.c_str());
-    return MALFORMED;
-  }
-
-  fname = StripParametersFromFunctionName(fname);
-  if (fname.size() == 0) {
-    CHECK(demangled.size() >= 256);
-    if (DEBUG_MODE)
-      Printf("PANIC: `%s`\n", demangled.c_str());
-    return MALFORMED;
-  }
-
-  return fname;
-}
-
-void OpenFileWriteStringAndClose(const string &file_name, const string &str) {
-#ifdef TS_VALGRIND
-  SysRes sres = VG_(open)((const Char*)file_name.c_str(),
-                          VKI_O_WRONLY|VKI_O_CREAT|VKI_O_TRUNC,
-                          VKI_S_IRUSR|VKI_S_IWUSR);
-  if (sr_isError(sres)) {
-    Report("WARNING: can not open file %s\n", file_name.c_str());
-    exit(1);
-  }
-  int fd = sr_Res(sres);
-  write(fd, str.c_str(), str.size());
-  close(fd);
-#else
-  CHECK(0);
-#endif
-}
-
-//--------- Sockets ------------------ {{{1
-#if defined(TS_PIN) && defined(__GNUC__)
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netdb.h>
-FILE *OpenSocketForWriting(const string &host_and_port) {
-  size_t col = host_and_port.find(":");
-  if (col == string::npos) return NULL;
-  string host = host_and_port.substr(0, col);
-  string port_str = host_and_port.substr(col + 1);
-  int sockfd;
-  struct sockaddr_in serv_addr;
-  struct hostent *server;
-  sockfd = socket(AF_INET, SOCK_STREAM, 0);
-  if (sockfd < 0) return NULL;
-  server = gethostbyname(host.c_str());
-  if (server == 0) return NULL;
-  memset(&serv_addr, 0, sizeof(serv_addr));
-  serv_addr.sin_family = AF_INET;
-  memcpy((char *)&serv_addr.sin_addr.s_addr,
-         (char *)server->h_addr,
-         server->h_length);
-  serv_addr.sin_port = htons(atoi(port_str.c_str()));
-  if (connect(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0)
-    return NULL;
-  return fdopen(sockfd, "w");
-}
-#else
-FILE *OpenSocketForWriting(const string &host_and_port) {
-  return NULL;  // unimplemented.
-}
-#endif
-//--------- TSLock ------------------ {{{1
-#ifdef _MSC_VER
-//# define TS_LOCK_PIPE
-# define TS_LOCK_PIN
-#else
-# define TS_LOCK_FUTEX
-#endif
-
-#if defined(TS_LOCK_PIPE) && defined(TS_PIN)
-#ifdef __GNUC__
-#include <unistd.h>
-// Lock based on pipe's send/receive. The idea (but not the code) 
-// is shamelessly stolen from valgrind's /coregrind/m_scheduler/sema.c
-struct TSLock::Rep {
-  bool held;
-  char pipe_char;
-  int pipe_fd[2];
-
-  void Write() {
-    char buf[2];
-    buf[0] = pipe_char;
-    buf[1] = 0;
-    int res = write(pipe_fd[1], buf, 1);
-    CHECK(res == 1);
-  }
-  bool Read() {
-    char buf[2];
-    buf[0] = 0;
-    buf[1] = 0;
-    int res = read(pipe_fd[0], buf, 1);
-    if (res != 1)
-      return false;
-    //Printf("rep::Read: %c\n", buf[0]);
-
-    pipe_char++;
-    if (pipe_char == 'Z' + 1) pipe_char = 'A';
-    return true;
-  }
-  void Open() {
-    CHECK(0 == pipe(pipe_fd));
-    CHECK(pipe_fd[0] != pipe_fd[1]);
-    pipe_char = 'A';
-  }
-  void Close() {
-    close(pipe_fd[0]);
-    close(pipe_fd[1]);
-  }
-};
-#elif defined(_MSC_VER)
-struct TSLock::Rep {
-  bool held;
-  char pipe_char;
-  WINDOWS::HANDLE pipe_fd[2];
-  void Write() {
-    char buf[2];
-    buf[0] = pipe_char;
-    buf[1] = 0;
-    WINDOWS::DWORD n_written = 0;
-    int res = WINDOWS::WriteFile(pipe_fd[1], buf, 1, &n_written, NULL);
-    CHECK(res != 0 && n_written == 1);
-  }
-  bool Read() {
-    char buf[2];
-    buf[0] = 0;
-    buf[1] = 0;
-    WINDOWS::DWORD n_read  = 0;
-    int res = WINDOWS::ReadFile(pipe_fd[0], buf, 1, &n_read, NULL);
-    if (res == 0 && n_read == 0)
-      return false;
-    //Printf("rep::Read: %c\n", buf[0]);
-
-    pipe_char++;
-    if (pipe_char == 'Z' + 1) pipe_char = 'A';
-    return true;
-  }
-  void Open() {
-    CHECK(WINDOWS::CreatePipe(&pipe_fd[0], &pipe_fd[1], NULL, 0));
-    CHECK(pipe_fd[0] != pipe_fd[1]);
-    pipe_char = 'A';
-  }
-  void Close() {
-    WINDOWS::CloseHandle(pipe_fd[0]);
-    WINDOWS::CloseHandle(pipe_fd[1]);
-  }
-};
-#endif
-
-TSLock::TSLock() {
-  rep_ = new Rep;
-  rep_->held = false;
-  rep_->Open();
-  rep_->Write();
-}
-TSLock::~TSLock() {
-  rep_->Close();
-}
-void TSLock::Lock() {
-  while(rep_->Read() == false)
-    ;
-  rep_->held = true;
-}
-void TSLock::Unlock() {
-  rep_->held = false;
-  rep_->Write();
-}
-void TSLock::AssertHeld() {
-  DCHECK(rep_->held);
-}
-#endif  // __GNUC__ & TS_LOCK_PIPE
-
-#if defined(TS_LOCK_PIN) && defined(TS_PIN)
-#include "pin.H"
-struct TSLock::Rep {
-  PIN_LOCK lock;
-  bool held;
-};
-
-TSLock::TSLock() {
-  rep_ = new Rep();
-  rep_->held = false;
-  InitLock(&rep_->lock);
-}
-TSLock::~TSLock() {
-  delete rep_;
-}
-void TSLock::Lock() {
-  GetLock(&rep_->lock, __LINE__);
-  rep_->held = true;
-}
-void TSLock::Unlock() {
-  rep_->held = false;
-  ReleaseLock(&rep_->lock);
-}
-void TSLock::AssertHeld() {
-  DCHECK(rep_->held);
-}
-#endif  // TS_LOCK_PIN
-
-#if defined(TS_WRAP_PTHREAD_LOCK)
-#include "tsan_rtl_wrap.h"
-
-struct TSLock::Rep {
-  pthread_mutex_t lock;
-  bool held;
-};
-TSLock::TSLock() {
-  rep_ = new Rep();
-  rep_->held = false;
-  __real_pthread_mutex_init(&rep_->lock, NULL);
-}
-TSLock::~TSLock() {
-  __real_pthread_mutex_destroy(&rep_->lock);
-  delete rep_;
-}
-void TSLock::Lock() {
-  __real_pthread_mutex_lock(&rep_->lock);
-  rep_->held = true;
-}
-void TSLock::Unlock() {
-  rep_->held = false;
-  __real_pthread_mutex_unlock(&rep_->lock);
-}
-void TSLock::AssertHeld() {
-  DCHECK(rep_->held);
-}
-#endif  // TS_LLVM
-
-#if defined(TS_LOCK_FUTEX) && defined(__GNUC__) && \
- (defined (TS_PIN) || defined (TS_LLVM))
-#include <linux/futex.h>
-#include <sys/time.h>
-#include <syscall.h>
-
-// Simple futex-based lock.
-// The idea is taken from "Futexes Are Tricky" by Ulrich Drepper
-
-TSLock::TSLock() {
-  rep_ = 0;
-  ANNOTATE_BENIGN_RACE(&rep_, "Benign race on TSLock::rep_");
-  ANNOTATE_RWLOCK_CREATE(this);
-}
-TSLock::~TSLock() {
-  ANNOTATE_RWLOCK_DESTROY(this);
-  DCHECK(rep_ == 0);
-}
-void TSLock::Lock() {
-  int *p = (int*)&rep_;
-  const int kSpinCount = 100;
-  DCHECK(kSpinCount > 0);
-  int c;
-  for (int i = 0; i < kSpinCount; i++) {
-    c = __sync_val_compare_and_swap(p, 0, 1);
-    if (c == 0) break;
-  }
-  if (c == 0) {
-    // The mutex was unlocked. Now it's ours. Done.
-    ANNOTATE_RWLOCK_ACQUIRED(this, /*is_w*/true);
-    return;
-  }
-  DCHECK(c == 1 || c == 2);
-  // We are going to block on this lock. Make sure others know that.
-  if (c != 2) {
-    c = __sync_lock_test_and_set(p, 2);
-  }
-  // Block.
-  int n_waits = 0;
-  while (c != 0) {
-    syscall(SYS_futex, p, FUTEX_WAIT, 2, 0, 0, 0);
-    n_waits++;
-    c = __sync_lock_test_and_set(p, 2);
-  }
-  ANNOTATE_RWLOCK_ACQUIRED(this, /*is_w*/true);
-  G_stats->futex_wait += n_waits;
-}
-void TSLock::Unlock() {
-  ANNOTATE_RWLOCK_RELEASED(this, /*is_w*/true);
-  int *p = (int*)&rep_;
-  DCHECK(*p == 1 || *p == 2);
-  int c = __sync_sub_and_fetch(p, 1);
-  DCHECK(c == 0 || c == 1);
-  if (c == 1) {
-    *p = 0;
-    syscall(SYS_futex, p, FUTEX_WAKE, 1, 0, 0, 0);
-  }
-}
-void TSLock::AssertHeld() {
-  DCHECK(rep_);
-}
-#endif
-
-// Same as above to compile Go's rtl
-// No annotations in this version: it should be simple as possible.
-#if defined(TS_LOCK_FUTEX) && defined(__GNUC__) && \
-  (defined (TS_GO))
-#include <linux/futex.h> // TODO(mpimenov): portability?
-#include <sys/time.h>
-#include <syscall.h>
-
-// Simple futex-based lock.
-// The idea is taken from "Futexes Are Tricky" by Ulrich Drepper
-
-TSLock::TSLock() {
-  rep_ = 0;
-}
-TSLock::~TSLock() {
-  DCHECK(rep_ == 0);
-}
-void TSLock::Lock() {
-  int *p = (int*)&rep_;
-  const int kSpinCount = 100;
-  DCHECK(kSpinCount > 0);
-  int c;
-  for (int i = 0; i < kSpinCount; i++) {
-    c = __sync_val_compare_and_swap(p, 0, 1);
-    if (c == 0) break;
-  }
-  if (c == 0) {
-    // The mutex was unlocked. Now it's ours. Done.
-    return;
-  }
-  DCHECK(c == 1 || c == 2);
-  // We are going to block on this lock. Make sure others know that.
-  if (c != 2) {
-    c = __sync_lock_test_and_set(p, 2);
-  }
-  // Block.
-  int n_waits = 0;
-  while (c != 0) {
-    syscall(SYS_futex, p, FUTEX_WAIT, 2, 0, 0, 0);
-    n_waits++;
-    c = __sync_lock_test_and_set(p, 2);
-  }
-  G_stats->futex_wait += n_waits;
-}
-void TSLock::Unlock() {
-  int *p = (int*)&rep_;
-  DCHECK(*p == 1 || *p == 2);
-  int c = __sync_sub_and_fetch(p, 1);
-  DCHECK(c == 0 || c == 1);
-  if (c == 1) {
-    *p = 0;
-    syscall(SYS_futex, p, FUTEX_WAKE, 1, 0, 0, 0);
-  }
-}
-void TSLock::AssertHeld() {
-  DCHECK(rep_);
-}
-#endif // (TS_LOCK_FUTEX) (__GNUC__) && (TS_GO)
-
-//--------------- Atomics ----------------- {{{1
-#if defined (_MSC_VER) && TS_SERIALIZED == 0
-uintptr_t AtomicExchange(uintptr_t *ptr, uintptr_t new_value) {
-  return _InterlockedExchange((volatile WINDOWS::LONG*)ptr, new_value);
-}
-
-void ReleaseStore(uintptr_t *ptr, uintptr_t value) {
-  *(volatile uintptr_t*)ptr = value;
-  // TODO(kcc): anything to add here?
-}
-
-int32_t NoBarrier_AtomicIncrement(int32_t* ptr) {
-  return _InterlockedIncrement((volatile WINDOWS::LONG *)ptr);
-}
-
-int32_t NoBarrier_AtomicDecrement(int32_t* ptr) {
-  return _InterlockedDecrement((volatile WINDOWS::LONG *)ptr);
-}
-#endif  // _MSC_VER && TS_SERIALIZED
-//--------------- YIELD ----------------- {{{1
-#if defined (_MSC_VER)
-#include <intrin.h>
-void YIELD() {
-  WINDOWS::Sleep(0);
-}
-void PROCESSOR_YIELD() {
-  _mm_pause();
-}
-#elif defined(TS_VALGRIND)
-void YIELD() {
-}
-void PROCESSOR_YIELD() {
-}
-#elif defined(__GNUC__)
-void YIELD() {
-  sched_yield();
-}
-void PROCESSOR_YIELD() {
-  __asm__ __volatile__ ("pause");
-}
-#else
-#error "Unknown config"
-#endif
-
-// end. {{{1
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/ts_util.h b/tsan/ts_util.h
deleted file mode 100644
index 43b2dd1..0000000
--- a/tsan/ts_util.h
+++ /dev/null
@@ -1,431 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Author: Timur Iskhodzhanov.
-
-// This file contains utility classes and functions used by ThreadSanitizer.
-// TODO(kcc): move more utilities from thread_sanitizer.cc to this file.
-
-#ifndef TS_UTIL_H_
-#define TS_UTIL_H_
-
-//--------- Head ------------------- {{{1
-#if defined(TS_VALGRIND)
-# define CHECK tl_assert
-#elif defined(TS_PIN)
-extern void Printf(const char *format, ...);
-extern void ThreadSanitizerDumpAllStacks();
-# define CHECK(x) do { if (!(x)) { \
-   Printf("Assertion failed: %s (%s:%d) %s\n", \
-          __FUNCTION__, __FILE__, __LINE__, #x); \
-   ThreadSanitizerDumpAllStacks(); \
-   exit(1); }} while ((void)0, 0)
-#elif defined(TS_OFFLINE)
-extern unsigned long offline_line_n;
-# define CHECK(x) do { if (!(x)) { \
-    Printf("ASSERT on line %ld\n", offline_line_n); \
-     assert(x);}} while ((void)0, 0)
-#else
-# define CHECK assert
-#endif
-
-// support for stlport in stlp_std:: namespace (or other custom ns)
-#ifdef TS_STL_NS
-# define STD TS_STL_NS 
-#else
-# define STD std
-#endif
-
-#if defined(TS_VALGRIND)
-# include "ts_valgrind.h"
-# define TS_USE_STLPORT
-#if defined(VGP_arm_linux)
-// This macro is explicitly undefined in glibc for ARM.
-#define _GLIBCXX_USE_C99 1
-#endif  // ARM
-
-// __WORDSIZE is GLibC-specific. Get it from Valgrind if needed.
-#if !defined(__WORDSIZE)
-#if VG_WORDSIZE == 4
-#define __WORDSIZE 32
-#elif VG_WORDSIZE == 8
-#define __WORDSIZE 64
-#endif // VG_WORDSIZE
-#endif // TS_VALGRIND && !__WORDSIZE
-
-#elif defined(TS_LLVM)
-#  define TS_USE_STLPORT
-# include <assert.h>
-# include <fcntl.h>
-# include <time.h>
-
-#elif defined(__GNUC__)
-# undef NDEBUG  // Assert is always on.
-# include <assert.h>
-# include <sys/types.h>
-# include <sys/stat.h>
-# include <fcntl.h>
-# define TS_USE_GNUC_STL
-
-#elif defined(_MSC_VER)
-# undef NDEBUG  // Assert is always on.
-# include <assert.h>
-# include <stdio.h>
-# include <intrin.h>
-# define TS_USE_WIN_STL
-
-#else
-# error "Unknown configuration"
-#endif
-
-//--------- STL ------------------- {{{1
-#if defined(TS_USE_GNUC_STL)  // ----------- g++ STL -----------
-#include <string.h>
-#include <limits.h>
-#include <set>
-#include <map>
-#include <vector>
-#include <deque>
-#include <stack>
-#include <algorithm>
-#include <string>
-#include <bitset>
-#include <new>
-#include <ext/algorithm>
-
-#ifdef __APPLE__
-// Apple's unordered_map in gcc 4.0 does not support -fno-exceptions.
-#include "ext/hash_map"
-#include "ext/hash_set"
-#define unordered_map __gnu_cxx::hash_map
-#define unordered_set __gnu_cxx::hash_set
-#else
-#include "tr1/unordered_map"
-#include "tr1/unordered_set"
-using STD::tr1::unordered_map;
-using STD::tr1::unordered_set;
-#endif
-
-#elif defined(TS_USE_STLPORT)  // ------------- STLport ----------
-#include "set"
-#include "map"
-#include "hash_map"
-#include "hash_set"
-#include "vector"
-#include "deque"
-#include "stack"
-#include "algorithm"
-#include "string"
-#include "bitset"
-#include "algorithm"
-#include "new"
-
-#include "unordered_map"
-#include "unordered_set"
-using STD::tr1::unordered_map;
-using STD::tr1::unordered_set;
-
-#elif defined(TS_USE_WIN_STL)  // ------------- MSVC STL ---------
-#include <string.h>
-#include <limits.h>
-#include <set>
-#include <map>
-#include <vector>
-#include <deque>
-#include <stack>
-#include <algorithm>
-#include <string>
-#include <bitset>
-#include <new>
-
-// No such thing in VC 2005
-//#include <unordered_map>
-//#include <unordered_set>
-//using std::tr1::unordered_map;
-//using std::tr1::unordered_set;
-#include <hash_map>
-#include <hash_set>
-#define unordered_map stdext::hash_map
-#define unordered_set stdext::hash_set
-
-#else
-# error "Unknown STL"
-#endif  // TS_USE_STANDARD_STL
-
-using STD::string;
-using STD::set;
-using STD::multiset;
-using STD::multimap;
-using STD::map;
-using STD::deque;
-using STD::stack;
-using STD::vector;
-using STD::bitset;
-using STD::nothrow_t;
-using STD::nothrow;
-
-using STD::min;
-using STD::max;
-using STD::sort;
-using STD::pair;
-using STD::make_pair;
-using STD::unique_copy;
-using STD::count;
-using STD::set_intersection;
-using STD::lower_bound;
-using STD::copy;
-using STD::binary_search;
-
-#ifdef TS_LLVM
-# include "tsan_rtl_wrap.h"
-#endif
-
-//--------- defines ------------------- {{{1
-#ifdef TS_VALGRIND
-// TODO(kcc) get rid of these macros.
-#define sprintf(arg1, arg2...) VG_(sprintf)((Char*)arg1, (HChar*)arg2)
-#define vsnprintf(a1, a2, a3, a4) VG_(vsnprintf)((Char*)a1, a2, a3, a4)
-#define getpid VG_(getpid)
-#define strchr(a,b)    VG_(strchr)((Char*)a,b)
-#define strdup(a) (char*)VG_(strdup)((HChar*)"strdup", (const Char*)a)
-#define snprintf(a,b,c...)     VG_(snprintf)((Char*)a,b,c)
-#define read VG_(read)
-#define getenv(x) VG_(getenv)((Char*)x)
-#define close VG_(close)
-#define write VG_(write)
-#define usleep(a) /*nothing. TODO.*/
-
-#elif defined(__GNUC__)
-#include <unistd.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#define UNLIKELY(x) __builtin_expect((x), 0)
-#define LIKELY(x)   __builtin_expect(!!(x), 1)
-
-#elif defined(_MSC_VER)
-typedef __int8 int8_t;
-typedef __int16 int16_t;
-typedef __int32 int32_t;
-typedef __int64 int64_t;
-typedef unsigned __int8 uint8_t;
-typedef unsigned __int16 uint16_t;
-typedef unsigned __int32 uint32_t;
-typedef unsigned __int64 uint64_t;
-
-typedef int pthread_t;
-int getpid();
-#define snprintf _snprintf
-#define strtoll strtol  // TODO(kcc): _MSC_VER hmm...
-#define UNLIKELY(x) (x)  // TODO(kcc): how to say this in MSVC?
-#define LIKELY(x)   (x)
-
-#else
-# error "Unknown configuration"
-#endif // TS_VALGRIND
-
-#define CHECK_GT(X, Y) CHECK((X) >  (Y))
-#define CHECK_LT(X, Y) CHECK((X) < (Y))
-#define CHECK_GE(X, Y) CHECK((X) >= (Y))
-#define CHECK_LE(X, Y) CHECK((X) <= (Y))
-#define CHECK_NE(X, Y) CHECK((X) != (Y))
-#define CHECK_EQ(X, Y) CHECK((X) == (Y))
-
-#if defined(DEBUG) && DEBUG >= 1
-  #define DCHECK(a) CHECK(a)
-  #define DEBUG_MODE (1)
-#else
-  #define DCHECK(a) do { if (0) { if (a) {} } } while((void)0, 0)
-  #define DEBUG_MODE (0)
-#endif
-
-#ifndef ALWAYS_INLINE
-  #if defined (__GNUC__)
-    #define ALWAYS_INLINE  inline __attribute__ ((always_inline))
-  #elif defined(_MSC_VER)
-    #define ALWAYS_INLINE __forceinline
-  #else
-    #error "Unknown Configuration"
-  #endif
-#endif
-
-#if defined(DEBUG) && DEBUG >= 1
-  #define INLINE
-  #define NOINLINE
-#elif defined (__GNUC__)
-  #define INLINE  ALWAYS_INLINE
-  #define NOINLINE __attribute__ ((noinline))
-#elif defined(_MSC_VER)
-  #define INLINE ALWAYS_INLINE
-  #define NOINLINE __declspec(noinline)
-#else
-  #error "Unknown Configuration"
-#endif
-
-// When TS_SERIALIZED==1, all calls to ThreadSanitizer* functions
-// should be serialized somehow. For example:
-//  - Valgrind serializes threads by using a pipe-based semaphore.
-//  - ThreadSanitizerOffline is single-threaded by nature.
-//  - A Multi-threaded environment (e.g. PIN) can use a single global Mutex.
-// When TS_SERIALIZED==0, ThreadSanitizer takes care of synchronization itself.
-
-#if defined(TS_SERIALIZED)
- // someone defined this already, leave it as is.
-#elif defined(TS_PIN)
-# define TS_SERIALIZED 1
-#elif defined(TS_LLVM)
-# define TS_SERIALIZED 0
-#elif defined(TS_GO)
-# define TS_SERIALIZED 0
-#else
-# define TS_SERIALIZED 1
-#endif
-
-
-#define TS_ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-//--------- Malloc profiling ------------------- {{{1
-class MallocCostCenterStack {
- public:
-  void Push(const char *cc) {
-    malloc_cost_centers_[size_++] = cc;
-  }
-  void Pop() {
-    size_--;
-  }
-  const char *Top() {
-    return size_ ? malloc_cost_centers_[size_ - 1] : "default_cc";
-  }
- private:
-  enum { kMaxMallocStackSize = 100 };
-  int size_;
-  const char *malloc_cost_centers_[kMaxMallocStackSize];
-};
-
-// Not thread-safe. Need to make it thread-local if we allow
-// malloc to be called concurrently.
-extern MallocCostCenterStack g_malloc_stack;
-
-class ScopedMallocCostCenter {
- public:
-  ScopedMallocCostCenter(const char *cc) {
-#if defined(TS_VALGRIND)
-    g_malloc_stack.Push(cc);
-#endif
-  }
-  ~ScopedMallocCostCenter() {
-#if defined(TS_VALGRIND)
-    g_malloc_stack.Pop();
-#endif
-  }
-};
-
-//--------- Forward decls ------------------- {{{1
-class ThreadSanitizerReport;
-
-// Time since some moment before the program start.
-extern size_t TimeInMilliSeconds();
-extern void YIELD();
-extern void PROCESSOR_YIELD();
-
-extern "C" long my_strtol(const char *str, char **end, int base);
-extern void Printf(const char *format, ...);
-
-// Strip (.*) and <.*>, also handle "function returns a function pointer" case.
-string NormalizeFunctionName(const string &mangled_fname);
-
-string ReadFileToString(const string &file_name, bool die_if_failed);
-
-// Get the current memory footprint of myself (parse /proc/self/status).
-size_t GetVmSizeInMb();
-
-// Sets the contents of the file 'file_name' to 'str'.
-void OpenFileWriteStringAndClose(const string &file_name, const string &str);
-
-// If host_and_port looks like myhost:12345, open a socket for writing
-// and returns a FILE object. Retuns NULL on failure.
-FILE *OpenSocketForWriting(const string &host_and_port);
-
-// If addr is inside a global object, returns true and sets 'name' and 'offset'
-bool GetNameAndOffsetOfGlobalObject(uintptr_t addr,
-                                    string *name, uintptr_t *offset);
-
-extern uintptr_t GetPcOfCurrentThread();
-
-extern void GetThreadStack(int tid, uintptr_t *min_addr, uintptr_t *max_addr);
-
-extern void SetNumberOfFoundErrors(int n_errs);
-extern int GetNumberOfFoundErrors();
-
-bool LiteRaceSkipTrace(int tid, uint32_t trace_no, uint32_t sampling_rate);
-
-
-inline uintptr_t tsan_bswap(uintptr_t x) {
-#if defined(VGP_arm_linux) && __WORDSIZE == 64
-  return __builtin_bswap64(x);
-#elif defined(VGP_arm_linux) && __WORDSIZE == 32
-  return __builtin_bswap32(x);
-#elif defined(__GNUC__) && __WORDSIZE == 64
-  __asm__("bswapq %0" : "=r" (x) : "0" (x));
-  return x;
-#elif defined(__GNUC__) && __WORDSIZE == 32
-  __asm__("bswapl %0" : "=r" (x) : "0" (x));
-  return x;
-#elif defined(_WIN32)
-  return x;  // TODO(kcc)
-#else
-# error  "Unknown Configuration"
-#endif // arch && VG_WORDSIZE
-}
-
-#ifdef _MSC_VER
-inline unsigned u32_log2(unsigned x) {
-  unsigned long y;
-  _BitScanReverse(&y, x);
-  return y;
-}
-#endif
-
-#ifdef __GNUC__
-inline unsigned u32_log2(unsigned x) {
-  return 31 - __builtin_clz(x);
-}
-#endif
-
-typedef unsigned prng_t;
-
-/// Simple stand-alone pseudorandom number generator.
-/// Current algorithm is ANSI C linear congruential PRNG.
-inline unsigned tsan_prng(prng_t* state) {
-  return (*state = *state * 1103515245 + 12345) >> 16;
-}
-
-
-#endif  // TS_UTIL_H_
-// end. {{{1
-// vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
diff --git a/tsan/ts_valgrind.cc b/tsan/ts_valgrind.cc
deleted file mode 100644
index c72f42d..0000000
--- a/tsan/ts_valgrind.cc
+++ /dev/null
@@ -1,1434 +0,0 @@
-/*
-  This file is part of ThreadSanitizer, a dynamic data race detector
-  based on Valgrind.
-
-  Copyright (C) 2008-2010 Google Inc
-     opensource@google.com
-  Copyright (C) 2007-2008 OpenWorks LLP
-      info@open-works.co.uk
-
-  This program is free software; you can redistribute it and/or
-  modify it under the terms of the GNU General Public License as
-  published by the Free Software Foundation; either version 2 of the
-  License, or (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-  02111-1307, USA.
-
-  The GNU General Public License is contained in the file COPYING.
-*/
-
-// Author: Konstantin Serebryany.
-// Parts of the code in this file are derived from Helgrind,
-// a data race detector written by Julian Seward.
-// Note that the rest of ThreadSanitizer code is not derived from Helgrind
-// and is published under the BSD license.
-
-#include "ts_valgrind.h"
-#include "valgrind.h"
-#include "ts_valgrind_client_requests.h"
-#include "thread_sanitizer.h"
-#include "ts_trace_info.h"
-#include "ts_race_verifier.h"
-#include "common_util.h"
-
-#include "coregrind/pub_core_basics.h"
-#include "coregrind/pub_core_machine.h"
-#include "coregrind/pub_core_clreq.h"
-#include "pub_tool_libcsetjmp.h"
-#include "coregrind/pub_core_threadstate.h"
-#include "pub_tool_libcproc.h"
-
-
-//---------------------- C++ malloc support -------------- {{{1
-void *operator new (size_t size) {
-  return VG_(malloc)((HChar*)g_malloc_stack.Top(), size);
-}
-void *operator new [](size_t size) {
-  return VG_(malloc)((HChar*)g_malloc_stack.Top(), size);
-}
-void operator delete (void *p) {
-  VG_(free)(p);
-}
-void operator delete [](void *p) {
-  VG_(free)(p);
-}
-
-extern "C" void *malloc(size_t size) {
-  return VG_(malloc)((HChar*)g_malloc_stack.Top(), size);
-}
-
-extern "C" void free(void *ptr) {
-  VG_(free)(ptr);
-}
-
-extern "C" void* realloc(void *ptr, size_t size) {
-  return VG_(realloc)((HChar*)g_malloc_stack.Top(), ptr, size);
-}
-
-
-//---------------------- Utils ------------------- {{{1
-
-extern "C" int puts(const char *s) {
-  Printf("%s", s);
-  return 1;
-}
-
-extern "C" void exit(int e) { VG_(exit)(e); }
-
-#ifdef VGO_darwin
-extern "C" void abort() { CHECK(0); }
-#endif
-
-
-// TODO: make this rtn public
-extern "C" {
-  Bool VG_(get_fnname_no_cxx_demangle) ( Addr a, Char* buf, Int nbuf );
-}
-
-
-const int kBuffSize = 1024 * 10 - 1;
-// not thread-safe.
-static char g_buff1[kBuffSize+1];
-static char g_buff2[kBuffSize+1];
-
-string PcToRtnName(uintptr_t pc, bool demangle) {
-  if (demangle) {
-    if(VG_(get_fnname)(pc, (Char*)g_buff1, kBuffSize)) {
-      return g_buff1;
-    }
-  } else {
-    if(VG_(get_fnname_no_cxx_demangle)(pc, (Char*)g_buff1, kBuffSize)) {
-      return g_buff1;
-    }
-  }
-  return "(no symbols)";
-}
-
-void PcToStrings(uintptr_t pc, bool demangle,
-                string *img_name, string *rtn_name,
-                string *file_name, int *line_no) {
-  const int kBuffSize = 1024 * 10 - 1;
-  Bool has_dirname = False;
-
-  if (VG_(get_filename_linenum)
-      (pc, (Char*)g_buff1, kBuffSize, (Char*)g_buff2, kBuffSize,
-       &has_dirname, (UInt*)line_no) &&
-      has_dirname) {
-    *file_name = string(g_buff2) + "/" + g_buff1;
-  } else {
-    VG_(get_linenum)(pc, (UInt *)line_no);
-    if (VG_(get_filename)(pc, (Char*)g_buff1, kBuffSize)) {
-      *file_name = g_buff1;
-    }
-  }
-  *file_name = ConvertToPlatformIndependentPath(*file_name);
-
-  *rtn_name = PcToRtnName(pc, demangle);
-
-  if (VG_(get_objname)(pc, (Char*)g_buff1, kBuffSize)) {
-    *img_name = g_buff1;
-  }
-}
-
-
-
-string Demangle(const char *str) {
-  return str;
-}
-
-extern "C"
-size_t strlen(const char *s) {
-  return VG_(strlen)((const Char*)s);
-}
-
-static inline ThreadId GetVgTid() {
-  extern ThreadId VG_(running_tid); // HACK: avoid calling get_running_tid()
-  ThreadId res = VG_(running_tid);
-  //DCHECK(res == VG_(get_running_tid)());
-  return res;
-}
-
-static inline uintptr_t GetVgPc(ThreadId vg_tid) {
-  Addr pc = VG_(threads)[vg_tid].arch.vex.VG_INSTR_PTR;
-  DCHECK(pc == VG_(get_IP)(vg_tid));
-  return pc;
-  //return (uintptr_t)VG_(get_IP)(vg_tid);
-}
-
-static inline uintptr_t GetVgSp(ThreadId vg_tid) {
-  Addr sp = VG_(threads)[vg_tid].arch.vex.VG_STACK_PTR;
-  DCHECK(sp == VG_(get_SP)(vg_tid));
-  return sp;
-}
-
-#ifdef VGP_arm_linux
-static inline uintptr_t GetVgLr(ThreadId vg_tid) {
-  return (uintptr_t)VG_(threads)[vg_tid].arch.vex.guest_R14;
-}
-#endif
-
-static uintptr_t g_current_pc;
-
-uintptr_t GetPcOfCurrentThread() {
-  return g_current_pc;
-}
-
-void GetThreadStack(int tid, uintptr_t *min_addr, uintptr_t *max_addr) {
-  // tid is not used because we call it from the current thread anyway.
-  uintptr_t stack_max  = VG_(thread_get_stack_max)(GetVgTid());
-  uintptr_t stack_size = VG_(thread_get_stack_size)(GetVgTid());
-  uintptr_t stack_min  = stack_max - stack_size;
-  *min_addr = stack_min;
-  *max_addr = stack_max;
-}
-
-struct CallStackRecord {
-  Addr pc;
-  Addr sp;
-#ifdef VGP_arm_linux
-  // We need to store LR in order to keep the shadow stack consistent.
-  Addr lr;
-#endif
-};
-
-const size_t kMaxMopsPerTrace = 2048;
-
-struct ValgrindThread {
-  int32_t zero_based_uniq_tid;
-  TSanThread *ts_thread;
-  uint32_t literace_sampling;
-  vector<CallStackRecord> call_stack;
-
-  int ignore_accesses;
-  int ignore_sync;
-  int in_signal_handler;
-
-  // thread-local event buffer (tleb).
-  uintptr_t tleb[kMaxMopsPerTrace];
-  TraceInfo *trace_info;
-
-  // PC (as in trace_info->pc()) of the trace currently being verified.
-  // 0 if outside of the verification sleep loop.
-  // -1 in the last iteration of the loop.
-  uintptr_t verifier_current_pc;
-
-  // End time of the current verification loop.
-  unsigned verifier_wakeup_time_ms;
-
-  ValgrindThread() {
-    Clear();
-  }
-
-  void Clear() {
-    ts_thread = NULL;
-    zero_based_uniq_tid = -1;
-    literace_sampling = G_flags->literace_sampling;  // cache it.
-    ignore_accesses = 0;
-    ignore_sync = 0;
-    in_signal_handler = 0;
-    call_stack.clear();
-    trace_info = NULL;
-    verifier_current_pc = 0;
-    verifier_wakeup_time_ms = 0;
-  }
-};
-
-// If true, ignore all accesses in all threads.
-extern bool global_ignore;
-
-// Array of VG_N_THREADS
-static ValgrindThread *g_valgrind_threads = 0;
-static map<uintptr_t, int> *g_ptid_to_ts_tid;
-
-// maintains a uniq thread id (first thread will have id=0)
-static int32_t g_uniq_thread_id_counter = 0;
-
-static int32_t VgTidToTsTid(ThreadId vg_tid) {
-  DCHECK(vg_tid < VG_N_THREADS);
-  DCHECK(vg_tid >= 1);
-  DCHECK(g_valgrind_threads);
-  DCHECK(g_valgrind_threads[vg_tid].zero_based_uniq_tid >= 0);
-  return g_valgrind_threads[vg_tid].zero_based_uniq_tid;
-}
-
-static vector<string> *g_command_line_options = 0;
-static void InitCommandLineOptions() {
-  if(G_flags == NULL) {
-    G_flags = new FLAGS;
-  }
-  if (g_command_line_options == NULL) {
-    g_command_line_options = new vector<string>;
-  }
-}
-
-Bool ts_process_cmd_line_option (Char* arg) {
-  InitCommandLineOptions();
-  g_command_line_options->push_back((char*)arg);
-  return True;
-}
-
-void ts_print_usage (void) {
-  InitCommandLineOptions();
-  ThreadSanitizerParseFlags(g_command_line_options);
-
-  ThreadSanitizerPrintUsage();
-}
-
-void ts_print_debug_usage(void) {
-  ThreadSanitizerPrintUsage();
-}
-
-extern int VG_(clo_error_exitcode);
-
-void ts_post_clo_init(void) {
-  ScopedMallocCostCenter malloc_cc(__FUNCTION__);
-  InitCommandLineOptions();
-  ThreadSanitizerParseFlags(g_command_line_options);
-
-  // we get num-callers from valgrind flags.
-  G_flags->num_callers = VG_(clo_backtrace_size);
-  if (!G_flags->error_exitcode)
-    G_flags->error_exitcode = VG_(clo_error_exitcode);
-
-  extern Int   VG_(clo_n_suppressions);
-  extern Int   VG_(clo_gen_suppressions);
-  extern Char* VG_(clo_suppressions)[];
-  extern Int   VG_(clo_n_fullpath_after);
-  extern Char* VG_(clo_fullpath_after)[];
-  // get the suppressions from Valgrind
-  for (int i = 0; i < VG_(clo_n_suppressions); i++) {
-    G_flags->suppressions.push_back((char*)VG_(clo_suppressions)[i]);
-  }
-  // get the --fullpath-after prefixes from Valgrind and treat them as
-  // --file-prefix-to-cut arguments.
-  for (int i = 0; i < VG_(clo_n_fullpath_after); i++) {
-    G_flags->file_prefix_to_cut.push_back((char*)VG_(clo_fullpath_after)[i]);
-  }
-  G_flags->generate_suppressions |= VG_(clo_gen_suppressions) >= 1;
-
-  if (G_flags->html) {
-    Report("<pre>\n"
-           "<br id=race0>"
-           "<a href=\"#race1\">Go to first race report</a>\n");
-  }
-  Report("ThreadSanitizerValgrind r%s: %s\n",
-         TS_VERSION,
-         G_flags->pure_happens_before ? "hybrid=no" : "hybrid=yes");
-  if (DEBUG_MODE) {
-    Report("INFO: Debug build\n");
-  }
-  if (G_flags->max_mem_in_mb) {
-    Report("INFO: ThreadSanitizer memory limit: %dMB\n",
-           (int)G_flags->max_mem_in_mb);
-  }
-  ThreadSanitizerInit();
-
-  g_valgrind_threads = new ValgrindThread[VG_N_THREADS];
-  g_ptid_to_ts_tid = new map<uintptr_t, int>;
-
-  if (g_race_verifier_active) {
-    RaceVerifierInit(G_flags->race_verifier, G_flags->race_verifier_extra);
-    global_ignore = true;
-  }
-}
-
-// Remember, valgrind is essentially single-threaded.
-// Each time we switch to another thread, we set the global g_cur_tleb
-// to the tleb of the current thread. This allows to load the tleb in one
-// instruction.
-static uintptr_t *g_cur_tleb;
-static void OnStartClientCode(ThreadId vg_tid, ULong nDisp) {
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-  g_cur_tleb = thr->tleb;
-}
-
-INLINE void FlushMops(ValgrindThread *thr, bool keep_trace_info = false) {
-  DCHECK(!g_race_verifier_active || global_ignore);
-  TraceInfo *t = thr->trace_info;
-  if (!t) return;
-  if (!keep_trace_info) {
-    thr->trace_info = NULL;
-  }
-
-  if (global_ignore || thr->ignore_accesses ||
-       (thr->literace_sampling &&
-        t->LiteRaceSkipTraceRealTid(thr->zero_based_uniq_tid, thr->literace_sampling))) {
-    thr->trace_info = NULL;
-    return;
-  }
-
-  size_t n = t->n_mops();
-  DCHECK(n > 0);
-  uintptr_t *tleb = thr->tleb;
-  DCHECK(thr->ts_thread);
-  ThreadSanitizerHandleTrace(thr->ts_thread, t, tleb);
-}
-
-static void ShowCallStack(ValgrindThread *thr) {
-  size_t n = thr->call_stack.size();
-  Printf("        ");
-  for (size_t i = n - 1; i > n - 10 && i >= 0; i--) {
-    Printf("{pc=%p sp=%p}, ", thr->call_stack[i].pc, thr->call_stack[i].sp);
-  }
-  Printf("\n");
-}
-
-static INLINE void UpdateCallStack(ValgrindThread *thr, uintptr_t sp) {
-  DCHECK(!g_race_verifier_active);
-  if (thr->trace_info) FlushMops(thr, true /* keep_trace_info */);
-  vector<CallStackRecord> &call_stack = thr->call_stack;
-  while (!call_stack.empty()) {
-    CallStackRecord &record = call_stack.back();
-    Addr cur_top = record.sp;
-    if (sp < cur_top) break;
-    call_stack.pop_back();
-    int32_t ts_tid = thr->zero_based_uniq_tid;
-    ThreadSanitizerHandleRtnExit(ts_tid);
-    if (debug_rtn) {
-      Printf("T%d: [%ld]<< pc=%p sp=%p cur_sp=%p %s\n",
-             ts_tid, thr->call_stack.size(), record.pc,
-             record.sp, sp,
-             PcToRtnNameAndFilePos(record.pc).c_str());
-      ShowCallStack(thr);
-    }
-  }
-}
-
-VG_REGPARM(1)
-static void OnTrace(TraceInfo *trace_info) {
-  DCHECK(!g_race_verifier_active);
-  //trace_info->counter()++;
-  if (global_ignore) return;
-  ThreadId vg_tid = GetVgTid();
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-
-  // First, flush the old trace_info.
-  if (thr->trace_info) {
-    FlushMops(thr);
-  }
-
-  UpdateCallStack(thr, GetVgSp(vg_tid));
-
-  // Start the new trace, zero the contents of tleb.
-  size_t n = trace_info->n_mops();
-  uintptr_t *tleb = thr->tleb;
-  for (size_t i = 0; i < n; i++)
-    tleb[i] = 0;
-  thr->trace_info = trace_info;
-  DCHECK(thr->trace_info);
-  DCHECK(thr->trace_info->n_mops() <= kMaxMopsPerTrace);
-}
-
-static inline void Put(EventType type, int32_t tid, uintptr_t pc,
-                       uintptr_t a, uintptr_t info) {
-  if (DEBUG_MODE && G_flags->dry_run >= 1) return;
-  Event event(type, tid, pc, a, info);
-  ThreadSanitizerHandleOneEvent(&event);
-}
-
-static void rtn_call(Addr sp_post_call_insn, Addr pc_post_call_insn,
-                     IGNORE_BELOW_RTN ignore_below) {
-  DCHECK(!g_race_verifier_active);
-  if (global_ignore) return;
-  ThreadId vg_tid = GetVgTid();
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-  int ts_tid = thr->zero_based_uniq_tid;
-  CallStackRecord record;
-  record.pc = pc_post_call_insn;
-  record.sp = sp_post_call_insn + 4;  // sp before call.
-  UpdateCallStack(thr, record.sp);
-#ifdef VGP_arm_linux
-  record.lr = GetVgLr(vg_tid);
-#endif
-  thr->call_stack.push_back(record);
-  // If the shadow stack grows too high this usually means it is not cleaned
-  // properly. Or this may be a very deep recursion.
-  DCHECK(thr->call_stack.size() < 10000);
-  uintptr_t call_pc = GetVgPc(vg_tid);
-  if (thr->trace_info) FlushMops(thr);
-  ThreadSanitizerHandleRtnCall(ts_tid, call_pc, record.pc,
-                               ignore_below);
-
-  if (debug_rtn) {
-    Printf("T%d: [%ld]>> pc=%p sp=%p %s\n",
-           ts_tid, thr->call_stack.size(), (void*)record.pc,
-           (void*)record.sp,
-           PcToRtnNameAndFilePos(record.pc).c_str());
-    ShowCallStack(thr);
-  }
-}
-
-VG_REGPARM(2) void evh__rtn_call_ignore_unknown ( Addr sp, Addr pc) {
-  rtn_call(sp, pc, IGNORE_BELOW_RTN_UNKNOWN);
-}
-VG_REGPARM(2) void evh__rtn_call_ignore_yes ( Addr sp, Addr pc) {
-  rtn_call(sp, pc, IGNORE_BELOW_RTN_YES);
-}
-VG_REGPARM(2) void evh__rtn_call_ignore_no ( Addr sp, Addr pc) {
-  rtn_call(sp, pc, IGNORE_BELOW_RTN_NO);
-}
-
-#ifdef VGP_arm_linux
-// Handle shadow stack frame deletion on ARM.
-// Instrumented code calls this function for each non-call jump out of
-// a superblock. If the |sp_post_call_insn| (the jump target address) is equal
-// to a link register value of one or more frames on top of the shadow stack,
-// those frames are popped out.
-// TODO(glider): there may be problems with optimized recursive functions that
-// don't change PC, SP and LR.
-VG_REGPARM(2)
-void evh__delete_frame ( Addr sp_post_call_insn,
-                         Addr pc_post_call_insn) {
-  DCHECK(!g_race_verifier_active);
-  ThreadId vg_tid = GetVgTid();
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-  if (thr->trace_info) FlushMops(thr);
-  vector<CallStackRecord> &call_stack = thr->call_stack;
-  int32_t ts_tid = VgTidToTsTid(vg_tid);
-  while (!call_stack.empty()) {
-    CallStackRecord &record = call_stack.back();
-    if (record.lr != pc_post_call_insn) break;
-    call_stack.pop_back();
-    ThreadSanitizerHandleRtnExit(ts_tid);
-  }
-}
-#endif
-
-void ts_fini(Int exitcode) {
-  ThreadSanitizerFini();
-  if (g_race_verifier_active) {
-    RaceVerifierFini();
-  }
-  if (G_flags->error_exitcode && GetNumberOfFoundErrors() > 0) {
-    exit(G_flags->error_exitcode);
-  }
-}
-
-
-void evh__pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
-  tl_assert(parent != child);
-  ValgrindThread *thr = &g_valgrind_threads[child];
-  //  Printf("thread_create: %d->%d\n", parent, child);
-  if (thr->zero_based_uniq_tid != -1) {
-    Printf("ThreadSanitizer WARNING: reusing TID %d w/o exiting thread\n",
-           child);
-  }
-  thr->Clear();
-  thr->zero_based_uniq_tid = g_uniq_thread_id_counter++;
-  // Printf("VG: T%d: VG_THR_START: parent=%d\n", VgTidToTsTid(child), VgTidToTsTid(parent));
-  Put(THR_START, VgTidToTsTid(child), 0, 0,
-      parent > 0 ? VgTidToTsTid(parent) : 0);
-  thr->ts_thread = ThreadSanitizerGetThreadByTid(thr->zero_based_uniq_tid);
-  CHECK(thr->ts_thread);
-}
-
-void evh__pre_workq_task_start(ThreadId vg_tid, Addr workitem) {
-  uintptr_t pc = GetVgPc(vg_tid);
-  int32_t ts_tid = VgTidToTsTid(vg_tid);
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-  FlushMops(thr);
-  Put(WAIT, ts_tid, pc, workitem, 0);
-}
-
-void evh__pre_thread_first_insn(const ThreadId vg_tid) {
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-  FlushMops(thr);
-  Put(THR_FIRST_INSN, VgTidToTsTid(vg_tid), GetVgPc(vg_tid), 0, 0);
-}
-
-
-void evh__pre_thread_ll_exit ( ThreadId quit_tid ) {
-//  Printf("thread_exit: %d\n", quit_tid);
-//  Printf("T%d quiting thread; stack size=%ld\n",
-//         VgTidToTsTid(quit_tid),
-//         (int)g_valgrind_threads[quit_tid].call_stack.size());
-  ValgrindThread *thr = &g_valgrind_threads[quit_tid];
-  FlushMops(thr);
-  Put(THR_END, VgTidToTsTid(quit_tid), 0, 0, 0);
-  g_valgrind_threads[quit_tid].zero_based_uniq_tid = -1;
-}
-
-  extern "C" void VG_(show_all_errors)();
-
-// Whether we are currently ignoring sync events for the given thread at the
-// given address.
-static inline Bool ignoring_sync(ThreadId vg_tid, uintptr_t addr) {
-  // We ignore locking events if ignore_sync != 0 and if we are not
-  // inside a signal handler.
-  return (g_valgrind_threads[vg_tid].ignore_sync &&
-          !g_valgrind_threads[vg_tid].in_signal_handler) ||
-      ThreadSanitizerIgnoreForNacl(addr);
-}
-
-Bool ts_handle_client_request(ThreadId vg_tid, UWord* args, UWord* ret) {
-  if (args[0] == VG_USERREQ__NACL_MEM_START) {
-    // This will get truncated on x86-32, but we don't support it with NaCl
-    // anyway.
-    const uintptr_t kFourGig = (uintptr_t)0x100000000ULL;
-    uintptr_t mem_start = args[1];
-    uintptr_t mem_end = mem_start + kFourGig;
-    ThreadSanitizerNaclUntrustedRegion(mem_start, mem_end);
-    return True;
-  }
-  if (!VG_IS_TOOL_USERREQ('T', 'S', args[0]))
-    return False;
-  int32_t ts_tid = VgTidToTsTid(vg_tid);
-  // Ignore almost everything in race verifier mode.
-  if (g_race_verifier_active) {
-    if (args[0] == TSREQ_EXPECT_RACE) {
-      Put(EXPECT_RACE, ts_tid, /*descr=*/args[2],
-          /*p=*/args[1], 0);
-    }
-    *ret = 0;
-    return True;
-  }
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-  if (thr->trace_info) FlushMops(thr);
-  UpdateCallStack(thr, GetVgSp(vg_tid));
-  *ret = 0;
-  uintptr_t pc = GetVgPc(vg_tid);
-  switch (args[0]) {
-    case TSREQ_SET_MY_PTHREAD_T:
-      (*g_ptid_to_ts_tid)[args[1]] = ts_tid;
-      break;
-    case TSREQ_THR_STACK_TOP:
-      Put(THR_STACK_TOP, ts_tid, pc, args[1], 0);
-      break;
-    case TSREQ_PTHREAD_JOIN_POST:
-      Put(THR_JOIN_AFTER, ts_tid, pc, (*g_ptid_to_ts_tid)[args[1]], 0);
-      break;
-    case TSREQ_CLEAN_MEMORY:
-      Put(MALLOC, ts_tid, pc, /*ptr=*/args[1], /*size=*/args[2]);
-      break;
-    case TSREQ_MAIN_IN:
-      g_has_entered_main = true;
-      // Report("INFO: Entred main(); argc=%d\n", (int)args[1]);
-      break;
-    case TSREQ_MAIN_OUT:
-      g_has_exited_main = true;
-      if (G_flags->exit_after_main) {
-        Report("INFO: Exited main(); ret=%d\n", (int)args[1]);
-        VG_(show_all_errors)();
-        ThreadSanitizerFini();
-        if (g_race_verifier_active) {
-          RaceVerifierFini();
-        }
-        exit((int)args[1]);
-      }
-      break;
-    case TSREQ_MALLOC:
-      // Printf("Malloc: %p %ld\n", args[1], args[2]);
-      Put(MALLOC, ts_tid, pc, /*ptr=*/args[1], /*size=*/args[2]);
-      break;
-    case TSREQ_FREE:
-      // Printf("Free: %p\n", args[1]);
-      Put(FREE, ts_tid, pc, /*ptr=*/args[1], 0);
-      break;
-    case TSREQ_MMAP:
-      Put(MMAP, ts_tid, pc, /*ptr=*/args[1], /*size=*/args[2]);
-      break;
-    case TSREQ_MUNMAP:
-      Put(MUNMAP, ts_tid, pc, /*ptr=*/args[1], /*size=*/args[2]);
-      break;
-    case TSREQ_BENIGN_RACE:
-      Put(BENIGN_RACE, ts_tid, /*descr=*/args[3],
-          /*p=*/args[1], /*size=*/args[2]);
-      break;
-    case TSREQ_EXPECT_RACE:
-      Put(EXPECT_RACE, ts_tid, /*descr=*/args[2], /*p=*/args[1], 0);
-      break;
-    case TSREQ_FLUSH_EXPECTED_RACES:
-      Put(FLUSH_EXPECTED_RACES, ts_tid, 0, 0, 0);
-      break;
-    case TSREQ_PCQ_CREATE:
-      Put(PCQ_CREATE, ts_tid, pc, /*pcq=*/args[1], 0);
-      break;
-    case TSREQ_PCQ_DESTROY:
-      Put(PCQ_DESTROY, ts_tid, pc, /*pcq=*/args[1], 0);
-      break;
-    case TSREQ_PCQ_PUT:
-      Put(PCQ_PUT, ts_tid, pc, /*pcq=*/args[1], 0);
-      break;
-    case TSREQ_PCQ_GET:
-      Put(PCQ_GET, ts_tid, pc, /*pcq=*/args[1], 0);
-      break;
-    case TSREQ_TRACE_MEM:
-      Put(TRACE_MEM, ts_tid, pc, /*mem=*/args[1], 0);
-      break;
-    case TSREQ_MUTEX_IS_USED_AS_CONDVAR:
-      Put(HB_LOCK, ts_tid, pc, /*lock=*/args[1], 0);
-      break;
-    case TSREQ_MUTEX_IS_NOT_PHB:
-      Put(NON_HB_LOCK, ts_tid, pc, /*lock=*/args[1], 0);
-      break;
-    case TSREQ_GLOBAL_IGNORE_ON:
-      Report("INFO: GLOBAL IGNORE ON\n");
-      global_ignore = true;
-      break;
-    case TSREQ_GLOBAL_IGNORE_OFF:
-      Report("INFO: GLOBAL IGNORE OFF\n");
-      global_ignore = false;
-      break;
-    case TSREQ_IGNORE_READS_BEGIN:
-      Put(IGNORE_READS_BEG, ts_tid, pc, 0, 0);
-      break;
-    case TSREQ_IGNORE_READS_END:
-      Put(IGNORE_READS_END, ts_tid, pc, 0, 0);
-      break;
-    case TSREQ_IGNORE_WRITES_BEGIN:
-      Put(IGNORE_WRITES_BEG, ts_tid, pc, 0, 0);
-      break;
-    case TSREQ_IGNORE_WRITES_END:
-      Put(IGNORE_WRITES_END, ts_tid, pc, 0, 0);
-      break;
-    case TSREQ_SET_THREAD_NAME:
-      Put(SET_THREAD_NAME, ts_tid, pc, /*name=*/args[1], 0);
-      break;
-    case TSREQ_SET_STACKTOP_STACKSIZE:
-      Put(THR_STACK_TOP, ts_tid, pc, /*addr=*/args[1], /*size=*/args[2]);
-      break;
-    case TSREQ_IGNORE_ALL_ACCESSES_BEGIN:
-      g_valgrind_threads[vg_tid].ignore_accesses++;
-      break;
-    case TSREQ_IGNORE_ALL_ACCESSES_END:
-      g_valgrind_threads[vg_tid].ignore_accesses--;
-      CHECK(g_valgrind_threads[vg_tid].ignore_accesses >= 0);
-      break;
-    case TSREQ_IGNORE_ALL_SYNC_BEGIN:
-      g_valgrind_threads[vg_tid].ignore_sync++;
-      break;
-    case TSREQ_IGNORE_ALL_SYNC_END:
-      g_valgrind_threads[vg_tid].ignore_sync--;
-      CHECK(g_valgrind_threads[vg_tid].ignore_sync >= 0);
-      break;
-    case TSREQ_PUBLISH_MEMORY_RANGE:
-      Put(PUBLISH_RANGE, ts_tid, pc, /*mem=*/args[1], /*size=*/args[2]);
-      break;
-    case TSREQ_UNPUBLISH_MEMORY_RANGE:
-      Put(UNPUBLISH_RANGE, ts_tid, pc, /*mem=*/args[1], /*size=*/args[2]);
-      break;
-    case TSREQ_PRINT_MEMORY_USAGE:
-    case TSREQ_PRINT_STATS:
-    case TSREQ_RESET_STATS:
-    case TSREQ_PTH_API_ERROR:
-      break;
-    case TSREQ_PTHREAD_RWLOCK_CREATE_POST:
-      if (ignoring_sync(vg_tid, args[1]))
-        break;
-      Put(LOCK_CREATE, ts_tid, pc, /*lock=*/args[1], 0);
-      break;
-    case TSREQ_PTHREAD_RWLOCK_DESTROY_PRE:
-      if (ignoring_sync(vg_tid, args[1]))
-        break;
-      Put(LOCK_DESTROY, ts_tid, pc, /*lock=*/args[1], 0);
-      break;
-    case TSREQ_PTHREAD_RWLOCK_LOCK_POST:
-      if (ignoring_sync(vg_tid, args[1]))
-        break;
-      Put(args[2] ? WRITER_LOCK : READER_LOCK, ts_tid, pc, /*lock=*/args[1], 0);
-      break;
-    case TSREQ_PTHREAD_RWLOCK_UNLOCK_PRE:
-      if (ignoring_sync(vg_tid, args[1]))
-        break;
-      Put(UNLOCK, ts_tid, pc, /*lock=*/args[1], 0);
-      break;
-    case TSREQ_PTHREAD_SPIN_LOCK_INIT_OR_UNLOCK:
-      Put(UNLOCK_OR_INIT, ts_tid, pc, /*lock=*/args[1], 0);
-      break;
-    case TSREQ_POSIX_SEM_INIT_POST:
-    case TSREQ_POSIX_SEM_DESTROY_PRE:
-      break;
-    case TSREQ_SIGNAL:
-      if (ignoring_sync(vg_tid, args[1]))
-        break;
-      Put(SIGNAL, ts_tid, pc, args[1], 0);
-      break;
-    case TSREQ_WAIT:
-      if (ignoring_sync(vg_tid, args[1]))
-        break;
-      Put(WAIT, ts_tid, pc, args[1], 0);
-      break;
-    case TSREQ_CYCLIC_BARRIER_INIT:
-      Put(CYCLIC_BARRIER_INIT, ts_tid, pc, args[1], args[2]);
-      break;
-    case TSREQ_CYCLIC_BARRIER_WAIT_BEFORE:
-      Put(CYCLIC_BARRIER_WAIT_BEFORE, ts_tid, pc, args[1], 0);
-      break;
-    case TSREQ_CYCLIC_BARRIER_WAIT_AFTER:
-      Put(CYCLIC_BARRIER_WAIT_AFTER, ts_tid, pc, args[1], 0);
-      break;
-    case TSREQ_GET_MY_SEGMENT:
-      break;
-    case TSREQ_GET_THREAD_ID:
-      *ret = ts_tid;
-      break;
-    case TSREQ_GET_VG_THREAD_ID:
-      *ret = vg_tid;
-      break;
-    case TSREQ_GET_SEGMENT_ID:
-      break;
-    case TSREQ_THREAD_SANITIZER_QUERY:
-      *ret = (UWord)ThreadSanitizerQuery((const char *)args[1]);
-      break;
-    case TSREQ_FLUSH_STATE:
-      Put(FLUSH_STATE, ts_tid, pc, 0, 0);
-      break;
-    default: CHECK(0);
-  }
-  return True;
-}
-
-static void SignalIn(ThreadId vg_tid, Int sigNo, Bool alt_stack) {
-  g_valgrind_threads[vg_tid].in_signal_handler++;
-  DCHECK(g_valgrind_threads[vg_tid].in_signal_handler == 1);
-//  int32_t ts_tid = VgTidToTsTid(vg_tid);
-//  Printf("T%d %s\n", ts_tid, __FUNCTION__);
-}
-
-static void SignalOut(ThreadId vg_tid, Int sigNo) {
-  g_valgrind_threads[vg_tid].in_signal_handler--;
-  CHECK(g_valgrind_threads[vg_tid].in_signal_handler >= 0);
-  DCHECK(g_valgrind_threads[vg_tid].in_signal_handler == 0);
-//  int32_t ts_tid = VgTidToTsTid(vg_tid);
-//  Printf("T%d %s\n", ts_tid, __FUNCTION__);
-}
-
-
-// ---------------------------- RaceVerifier    ---------------------------{{{1
-
-/**
- * In race verifier mode _every_ IRSB is instrumented with a sleep loop at the
- * beginning (but, of course, in most cases it is not executed).
- * Its code logically looks like
- *  irsb_start:
- *   bool need_sleep = OnTraceVerify1();
- *   if (need_sleep) {
- *     sched_yield();
- *     goto irsb_start;
- *   }
- *   OnTraceVerify2(trace_info);
- *
- * This loop verifies mops from the _previous_ trace_info and sets up the new
- * trace info in OnTraceVerify2. Only IRSBs with "interesting" mops have
- * non-zero trace_info.
- */
-
-/**
- * Race verification loop.
- * On the first pass (for a trace_info), if there are mops to be verified,
- * register them with RaceVerifier and calculate the wake up time.
- * On the following passes, check the wake up time against the clock.
- * The loop state is kept in ValgrindThread.
- * Returns true if need to sleep more, false if the loop must be ended.
- */
-VG_REGPARM(1)
-static uint32_t OnTraceVerify1() {
-  DCHECK(g_race_verifier_active);
-  ThreadId vg_tid = GetVgTid();
-
-  // First, flush the old trace_info.
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-
-  // thr->trace_info is the trace info for the previous superblock.
-  if (!thr->trace_info)
-    // Nothing to do here.
-    return 0;
-
-  if (!thr->verifier_current_pc) {
-    // This is the first iteration of the sleep loop.
-    // Register memory accesses.
-    int sleep_time_ms = RaceVerifierGetSleepTime(thr->trace_info->pc());
-    if (!sleep_time_ms) {
-      thr->trace_info = NULL;
-      return 0;
-    }
-    size_t n = thr->trace_info->n_mops();
-    uintptr_t* tleb = thr->tleb;
-    int need_sleep = 0;
-    for (size_t i = 0; i < n; ++i) {
-      uintptr_t addr = tleb[i];
-      if (addr) {
-        MopInfo *mop = thr->trace_info->GetMop(i);
-        need_sleep += RaceVerifierStartAccess(thr->zero_based_uniq_tid, addr,
-            mop->pc(), mop->is_write());
-      }
-    }
-    // Setup the sleep timer.
-    thr->verifier_current_pc = thr->trace_info->pc();
-    if (need_sleep) {
-      unsigned now = VG_(read_millisecond_timer)();
-      thr->verifier_wakeup_time_ms = now + sleep_time_ms;
-      return 1;
-    } else {
-      thr->verifier_current_pc = (unsigned)-1;
-      return 0;
-    }
-  } else {
-    // Continuation of the sleep loop.
-    DCHECK(thr->verifier_current_pc == thr->trace_info->pc());
-    unsigned now = VG_(read_millisecond_timer)();
-    if (now < thr->verifier_wakeup_time_ms) {
-      // sleep more
-      return 1;
-    } else {
-      // done, go straight to OnTraceVerify2
-      thr->verifier_current_pc = (unsigned)-1;
-      return 0;
-    }
-  }
-}
-
-/**
- * Race verification loop exit.
- * Unregisters mops with the RaceVerifier.
- * Sets up the new trace_info.
- */
-VG_REGPARM(1)
-static void OnTraceVerify2(TraceInfo *trace_info) {
-  DCHECK(g_race_verifier_active);
-  ThreadId vg_tid = GetVgTid();
-  ValgrindThread *thr = &g_valgrind_threads[vg_tid];
-
-  DCHECK(!thr->trace_info || thr->verifier_current_pc == (unsigned)-1);
-  thr->verifier_current_pc = 0;
-  thr->verifier_wakeup_time_ms = 0;
-
-  if (thr->trace_info) {
-    // Unregister accesses from the old trace_info.
-    size_t n = thr->trace_info->n_mops();
-    uintptr_t* tleb = thr->tleb;
-    for (size_t i = 0; i < n; ++i) {
-      uintptr_t addr = tleb[i];
-      if (addr) {
-        MopInfo *mop = thr->trace_info->GetMop(i);
-        RaceVerifierEndAccess(thr->zero_based_uniq_tid, addr,
-            mop->pc(), mop->is_write());
-      }
-    }
-  }
-
-  // Start the new trace, zero the contents of tleb.
-  thr->trace_info = trace_info;
-  if (trace_info) {
-    size_t n = trace_info->n_mops();
-    uintptr_t *tleb = thr->tleb;
-    for (size_t i = 0; i < n; i++)
-      tleb[i] = 0;
-    DCHECK(thr->trace_info->n_mops() <= kMaxMopsPerTrace);
-  }
-}
-
-/**
- * Add a race verification preamble to the IRSB.
- */
-static void ts_instrument_trace_entry_verify(IRSB *bbOut,
-    VexGuestLayout* layout, TraceInfo *trace_info, uintptr_t cur_pc) {
-   HChar*   hName = (HChar*)"OnTraceVerify1";
-   void *callback = (void*)OnTraceVerify1;
-   IRExpr **args = mkIRExprVec_0();
-   IRTemp need_sleep = newIRTemp(bbOut->tyenv, Ity_I32);
-   IRDirty* di = unsafeIRDirty_1_N(need_sleep, 0, hName,
-       VG_(fnptr_to_fnentry)(callback), args);
-   addStmtToIRSB( bbOut, IRStmt_Dirty(di));
-
-   IRTemp need_sleep_i1 = newIRTemp(bbOut->tyenv, Ity_I1);
-   IRStmt* cmp_stmt = IRStmt_WrTmp(need_sleep_i1,
-       IRExpr_Binop(Iop_CmpNE32,
-           IRExpr_RdTmp(need_sleep),
-           IRExpr_Const(IRConst_U32(0))));
-   addStmtToIRSB(bbOut, cmp_stmt);
-
-   IRConst* exit_dst = layout->sizeof_IP == 8 ?
-       IRConst_U64(cur_pc) : IRConst_U32(cur_pc);
-   IRStmt* exit_stmt = IRStmt_Exit(IRExpr_RdTmp(need_sleep_i1),
-       Ijk_YieldNoRedir, exit_dst);
-   addStmtToIRSB(bbOut, exit_stmt);
-
-   hName = (HChar*)"OnTraceVerify2";
-   callback = (void*)OnTraceVerify2;
-   args = mkIRExprVec_1(mkIRExpr_HWord((HWord)trace_info));
-   di = unsafeIRDirty_0_N(1, hName, VG_(fnptr_to_fnentry)(callback), args);
-   addStmtToIRSB( bbOut, IRStmt_Dirty(di));
-}
-
-
-// ---------------------------- Instrumentation ---------------------------{{{1
-
-static IRTemp gen_Get_SP ( IRSB*           bbOut,
-                           VexGuestLayout* layout,
-                           Int             hWordTy_szB )
-{
-  IRExpr* sp_expr;
-  IRTemp  sp_temp;
-  IRType  sp_type;
-  /* This in effect forces the host and guest word sizes to be the
-     same. */
-  tl_assert(hWordTy_szB == layout->sizeof_SP);
-  sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
-  sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
-  sp_temp = newIRTemp( bbOut->tyenv, sp_type );
-  addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
-  return sp_temp;
-}
-
-static void ts_instrument_trace_entry(IRSB *bbOut, TraceInfo *trace_info) {
-   CHECK(trace_info);
-   HChar*   hName = (HChar*)"OnTrace";
-   void *callback = (void*)OnTrace;
-   IRExpr **args = mkIRExprVec_1(mkIRExpr_HWord((HWord)trace_info));
-   IRDirty* di = unsafeIRDirty_0_N( 1,
-                           hName,
-                           VG_(fnptr_to_fnentry)(callback),
-                           args);
-   addStmtToIRSB( bbOut, IRStmt_Dirty(di));
-}
-
-static void ts_instrument_final_jump (
-                                /*MOD*/IRSB* sbOut,
-                                IRExpr* next,
-                                IRJumpKind jumpkind,
-                                VexGuestLayout* layout,
-                                IRType gWordTy, IRType hWordTy ) {
-
-#ifndef VGP_arm_linux
-  // On non-ARM systems we instrument only function calls.
-  if (jumpkind != Ijk_Call) return;
-#else
-  if (jumpkind != Ijk_Call) {
-    // On an ARM system a non-call jump may possibly exit a function.
-    IRTemp sp_post_call_insn
-        = gen_Get_SP( sbOut, layout, sizeofIRType(hWordTy) );
-    IRExpr **args = mkIRExprVec_2(
-        IRExpr_RdTmp(sp_post_call_insn),
-        next
-        );
-    IRDirty* di = unsafeIRDirty_0_N(
-        2/*regparms*/,
-        (char*)"evh__delete_frame",
-        VG_(fnptr_to_fnentry)((void*) &evh__delete_frame ),
-        args );
-    addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
-    return;  // do not fall through
-  }
-#endif
-  {
-    const char *fn_name = "evh__rtn_call_ignore_unknown";
-    void *fn = (void*)&evh__rtn_call_ignore_unknown;
-    // Instrument the call instruction to keep the shadow stack consistent.
-    IRTemp sp_post_call_insn
-        = gen_Get_SP( sbOut, layout, sizeofIRType(hWordTy) );
-    IRExpr **args = mkIRExprVec_2(
-        IRExpr_RdTmp(sp_post_call_insn),
-        next
-        );
-    if (next->tag == Iex_Const) {
-      IRConst *con = next->Iex.Const.con;
-      uintptr_t target = 0;
-      if (con->tag == Ico_U32 || con->tag == Ico_U64) {
-        target = con->tag == Ico_U32 ? con->Ico.U32 : con->Ico.U64;
-        bool ignore = ThreadSanitizerIgnoreAccessesBelowFunction(target);
-        if (ignore) {
-          fn_name = "evh__rtn_call_ignore_yes";
-          fn = (void*)&evh__rtn_call_ignore_yes;
-        } else {
-          fn_name = "evh__rtn_call_ignore_no";
-          fn = (void*)&evh__rtn_call_ignore_no;
-        }
-      }
-    }
-    IRDirty* di = unsafeIRDirty_0_N(
-        2/*regparms*/,
-        (char*)fn_name,
-        VG_(fnptr_to_fnentry)(fn),
-        args );
-    addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
-  }
-}
-
-// Generate exprs/stmts that make g_cur_tleb[idx] = x.
-static void gen_store_to_tleb(IRSB *bbOut, IRTemp tleb_temp,
-                              uintptr_t idx, IRExpr *x, IRType tyAddr) {
-  CHECK(tleb_temp != IRTemp_INVALID);
-  IRExpr *idx_expr  = mkIRExpr_HWord(idx * sizeof(uintptr_t));
-  IRExpr *tleb_plus_idx_expr = IRExpr_Binop(
-      sizeof(uintptr_t) == 8 ? Iop_Add64 : Iop_Add32,
-      IRExpr_RdTmp(tleb_temp), idx_expr);
-  IRTemp temp = newIRTemp(bbOut->tyenv, tyAddr);
-  IRStmt *temp_stmt = IRStmt_WrTmp(temp, tleb_plus_idx_expr);
-  IRStmt *store_stmt = IRStmt_Store(Iend_LE, IRExpr_RdTmp(temp), x);
-
-  addStmtToIRSB(bbOut, temp_stmt);
-  addStmtToIRSB(bbOut, store_stmt);
-}
-
-static void instrument_mem_access ( TraceInfo *trace_info,
-                                    IRTemp tleb_temp,
-                                    uintptr_t pc,
-                                    size_t  *trace_idx,
-                                    IRSB*   bbOut,
-                                    IRStmt* st,
-                                    IRExpr* addr,
-                                    Int     szB,
-                                    Bool    isStore,
-                                    Bool    dtor_head,
-                                    Int     hWordTy_szB ) {
-  IRType   tyAddr   = Ity_INVALID;
-
-  tl_assert(isIRAtom(addr));
-  tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
-
-  tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
-  tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
-
-  if (szB == 28) {
-    // Ignore weird-sized accesses for now.
-    // See http://code.google.com/p/data-race-test/issues/detail?id=36
-    return;
-  }
-
-  bool check_ident_store = false;
-
-  if (st->tag == Ist_Store && dtor_head && 
-      typeOfIRExpr(bbOut->tyenv, st->Ist.Store.data) == tyAddr) {
-    check_ident_store = true;
-  }
-
-  size_t next_trace_idx = *trace_idx + 1;
-
-  if (next_trace_idx > kMaxMopsPerTrace) {
-    if (next_trace_idx == kMaxMopsPerTrace) {
-      Report("INFO: too many mops in trace: %p %s\n", pc,
-             PcToRtnName(pc, true).c_str());
-    }
-    return;
-  }
-
-  if (!trace_info) {
-    // not instrumenting yet.
-    *trace_idx = next_trace_idx;
-    return;
-  }
-
-  IRExpr *expr_to_store = NULL;
-
-  if (check_ident_store) {
-    int is_64 = (sizeof(void*) == 8);
-    // generate expression (*addr == new_value ? 0 : addr):
-
-    // old_value = *addr
-    IRExpr *addr_load_expr = IRExpr_Load(Iend_LE, tyAddr, addr);
-    IRTemp star_addr = newIRTemp(bbOut->tyenv, tyAddr);
-    IRStmt *star_addr_stmt = IRStmt_WrTmp(star_addr, addr_load_expr);
-    addStmtToIRSB(bbOut, star_addr_stmt);
-    // sub = (old_value - new_value)
-    IRTemp sub = newIRTemp(bbOut->tyenv, tyAddr);
-    IRExpr *sub_expr = IRExpr_Binop((IROp)(Iop_Sub32 + is_64),
-                                    IRExpr_RdTmp(star_addr),
-                                    st->Ist.Store.data);
-    IRStmt *sub_stmt = IRStmt_WrTmp(sub, sub_expr);
-    addStmtToIRSB(bbOut, sub_stmt);
-    // mask = (sub==0) ? 0 : -1
-    IRTemp mask = newIRTemp(bbOut->tyenv, tyAddr);
-    IRExpr *mask_expr = IRExpr_Unop((IROp)(Iop_CmpwNEZ32 + is_64),
-                                    IRExpr_RdTmp(sub));
-    IRStmt *mask_stmt = IRStmt_WrTmp(mask, mask_expr);
-    addStmtToIRSB(bbOut, mask_stmt);
-
-    // res = mask & addr
-    IRTemp and_tmp = newIRTemp(bbOut->tyenv, tyAddr);
-    IRExpr *and_expr = IRExpr_Binop((IROp)(Iop_And32 + is_64),
-                                    IRExpr_RdTmp(mask), addr);
-    IRStmt *and_stmt = IRStmt_WrTmp(and_tmp, and_expr);
-    addStmtToIRSB(bbOut, and_stmt);
-
-    expr_to_store = IRExpr_RdTmp(and_tmp);
-  } else {
-    expr_to_store = addr;
-  }
-
-  // OnMop: g_cur_tleb[idx] = expr_to_store
-  gen_store_to_tleb(bbOut, tleb_temp, *trace_idx, expr_to_store, tyAddr);
-  // Create a mop {pc, size, is_write}
-  MopInfo *mop = trace_info->GetMop(*trace_idx);
-  new (mop) MopInfo(pc, szB, isStore, false);
-  (*trace_idx)++;
-
-  CHECK(*trace_idx == next_trace_idx);
-}
-
-void instrument_statement (IRStmt* st, IRSB* bbIn, IRSB* bbOut, IRType hWordTy,
-                           TraceInfo *trace_info, IRTemp tleb_temp,
-                           size_t *idx, uintptr_t *cur_pc, bool dtor_head) {
-  switch (st->tag) {
-    case Ist_NoOp:
-    case Ist_AbiHint:
-    case Ist_Put:
-    case Ist_PutI:
-    case Ist_Exit:
-      /* None of these can contain any memory references. */
-      break;
-
-    case Ist_IMark:
-      *cur_pc = st->Ist.IMark.addr;
-      break;
-
-    case Ist_MBE:
-      //instrument_memory_bus_event( bbOut, st->Ist.MBE.event );
-      switch (st->Ist.MBE.event) {
-        case Imbe_Fence:
-          break; /* not interesting */
-        default:
-          ppIRStmt(st);
-          tl_assert(0);
-      }
-      break;
-
-    case Ist_CAS:
-      break;
-
-    case Ist_Store:
-      instrument_mem_access(trace_info, tleb_temp, *cur_pc, idx,
-        bbOut, st,
-        st->Ist.Store.addr,
-        sizeofIRType(typeOfIRExpr(bbIn->tyenv, st->Ist.Store.data)),
-        True/*isStore*/, dtor_head,
-        sizeofIRType(hWordTy)
-      );
-      break;
-
-    case Ist_WrTmp: {
-      IRExpr* data = st->Ist.WrTmp.data;
-      if (data->tag == Iex_Load) {
-        instrument_mem_access(trace_info, tleb_temp, *cur_pc, idx,
-            bbOut, st,
-            data->Iex.Load.addr,
-            sizeofIRType(data->Iex.Load.ty),
-            False/*!isStore*/, dtor_head,
-            sizeofIRType(hWordTy)
-            );
-      }
-      break;
-    }
-
-    case Ist_LLSC: {
-      /* Ignore load-linked's and store-conditionals. */
-      break;
-    }
-
-    case Ist_Dirty: {
-      Int      dataSize;
-      IRDirty* d = st->Ist.Dirty.details;
-      if (d->mFx != Ifx_None) {
-        /* This dirty helper accesses memory.  Collect the
-           details. */
-        tl_assert(d->mAddr != NULL);
-        tl_assert(d->mSize != 0);
-        dataSize = d->mSize;
-        if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
-          instrument_mem_access(trace_info, tleb_temp, *cur_pc, idx,
-            bbOut, st, d->mAddr, dataSize, False/*!isStore*/, dtor_head,
-            sizeofIRType(hWordTy)
-          );
-        }
-        if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
-          instrument_mem_access(trace_info, tleb_temp, *cur_pc, idx,
-            bbOut, st, d->mAddr, dataSize, True/*isStore*/, dtor_head,
-            sizeofIRType(hWordTy)
-          );
-        }
-      } else {
-        tl_assert(d->mAddr == NULL);
-        tl_assert(d->mSize == 0);
-      }
-      break;
-    }
-
-    default:
-      ppIRStmt(st);
-      tl_assert(0);
-  } /* switch (st->tag) */
-}
-
-static IRSB* ts_instrument ( VgCallbackClosure* closure,
-                             IRSB* bbIn,
-                             VexGuestLayout* layout,
-                             VexGuestExtents* vge,
-                             IRType gWordTy, IRType hWordTy) {
-  if (G_flags->dry_run >= 2) return bbIn;
-  Int   i;
-  IRSB* bbOut;
-  uintptr_t pc = closure->readdr;
-
-  char objname[kBuffSize];
-  if (VG_(get_objname)(pc, (Char*)objname, kBuffSize)) {
-    if (StringMatch("*/ld-2*", objname)) {
-      // we want to completely ignore ld-so.
-      return bbIn;
-    }
-  }
-
-  bool instrument_memory = ThreadSanitizerWantToInstrumentSblock(pc);
-
-  if (gWordTy != hWordTy) {
-    /* We don't currently support this case. */
-    VG_(tool_panic)((Char*)"host/guest word size mismatch");
-  }
-
-  /* Set up BB */
-  bbOut           = emptyIRSB();
-  bbOut->tyenv    = deepCopyIRTypeEnv(bbIn->tyenv);
-  bbOut->next     = deepCopyIRExpr(bbIn->next);
-  bbOut->jumpkind = bbIn->jumpkind;
-
-  // Copy verbatim any IR preamble preceding the first IMark
-  i = 0;
-  while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) {
-    addStmtToIRSB( bbOut, bbIn->stmts[i] );
-    i++;
-  }
-  int first = i;
-  size_t n_mops = 0;
-  uintptr_t cur_pc = pc;
-
-  IRTemp tleb_temp = IRTemp_INVALID;
-
-  bool dtor_head = false;
-  char buff[1000];
-  // get_fnname_w_offset returns demangled name with optional "+offset" prefix.
-  // If we have "::~" and don't have "+", this SB is the first in this dtor.
-  // We do all this stuff to avoid benign races on vptr:
-  // http://code.google.com/p/data-race-test/wiki/PopularDataRaces#Data_race_on_vptr
-  if (VG_(get_fnname_w_offset)(pc, (Char*)buff, sizeof(buff)) &&
-      VG_(strstr)((Char*)buff, (Char*)"::~") != NULL) {
-    char *offset_str = (char*)VG_(strchr)((Char*)buff, '+');
-    if (offset_str == NULL) {
-      // we are in the first BB of DTOR.
-      dtor_head = true;
-    } else {
-      // We are not in the first BB.
-      // On x86_64 (it seems like) the vfptr is updated only in the first BB.
-      // On x86 with -fPIC, the vfptr may be updated in the second BB
-      // (because -fPIC adds a call which splits the first BB).
-      // See http://code.google.com/p/chromium/issues/detail?id=61199
-#ifdef VGA_x86
-      char *end;
-      size_t offset = my_strtol(offset_str + 1, &end, 10);
-      if (offset <= 32) {
-        dtor_head = true;
-      }
-#endif
-    }
-  }
-
-
-  uintptr_t instrument_pc = 0; // if != 0, instrument only the instruction at this address
-  if (g_race_verifier_active) {
-    uintptr_t min_pc = vge->base[0];
-    uintptr_t max_pc = min_pc + vge->len[0];
-    bool verify_trace = RaceVerifierGetAddresses(min_pc, max_pc, &instrument_pc);
-    if (!verify_trace)
-      instrument_memory = false;
-  }
-
-  // count mops
-  if (instrument_memory) {
-    for (i = first; i < bbIn->stmts_used; i++) {
-      IRStmt* st = bbIn->stmts[i];
-      tl_assert(st);
-      tl_assert(isFlatIRStmt(st));
-      if (st->tag == Ist_IMark)
-        cur_pc = st->Ist.IMark.addr;
-      if (!instrument_pc || cur_pc == instrument_pc)
-        instrument_statement(st, bbIn, bbOut, hWordTy,
-            NULL, tleb_temp, &n_mops, &cur_pc, dtor_head);
-    } /* iterate over bbIn->stmts */
-  }
-  TraceInfo *trace_info = NULL;
-  if (n_mops > 0) {
-    trace_info = TraceInfo::NewTraceInfo(n_mops, pc);
-  }
-  size_t n_mops_done = 0;
-  bool need_to_insert_on_trace = n_mops > 0 || g_race_verifier_active;
-  // instrument mops and copy the rest of BB to the new one.
-  for (i = first; i < bbIn->stmts_used; i++) {
-    IRStmt* st = bbIn->stmts[i];
-    tl_assert(st);
-    tl_assert(isFlatIRStmt(st));
-    if (st->tag != Ist_IMark && need_to_insert_on_trace) {
-      if (g_race_verifier_active) {
-        ts_instrument_trace_entry_verify(bbOut, layout, trace_info,
-            closure->readdr);
-      } else {
-        ts_instrument_trace_entry(bbOut, trace_info);
-      }
-      need_to_insert_on_trace = false;
-      // Generate temp for *g_cur_tleb.
-      IRType   tyAddr = sizeof(uintptr_t) == 8 ?  Ity_I64 : Ity_I32;
-      IRExpr *tleb_ptr_expr = mkIRExpr_HWord((HWord)&g_cur_tleb);
-      IRExpr *tleb_expr = IRExpr_Load(Iend_LE, tyAddr, tleb_ptr_expr);
-      tleb_temp = newIRTemp(bbOut->tyenv, tyAddr);
-      IRStmt *stmt = IRStmt_WrTmp(tleb_temp, tleb_expr);
-      addStmtToIRSB(bbOut, stmt);
-    }
-    if (instrument_memory) {
-      if (st->tag == Ist_IMark)
-        cur_pc = st->Ist.IMark.addr;
-      if (!instrument_pc || cur_pc == instrument_pc)
-        instrument_statement(st, bbIn, bbOut, hWordTy,
-            trace_info, tleb_temp, &n_mops_done, &cur_pc, dtor_head);
-    }
-    addStmtToIRSB( bbOut, st );
-  } /* iterate over bbIn->stmts */
-  CHECK(n_mops == n_mops_done);
-  if (!g_race_verifier_active)
-    ts_instrument_final_jump(bbOut, bbIn->next, bbIn->jumpkind, layout, gWordTy, hWordTy);
-  return bbOut;
-}
-
-extern "C"
-void ts_pre_clo_init(void) {
-  VG_(details_name)            ((Char*)"ThreadSanitizer");
-  VG_(details_version)         ((Char*)NULL);
-  VG_(details_description)     ((Char*)"a data race detector");
-  VG_(details_copyright_author)(
-      (Char*)"Copyright (C) 2008-2010, and GNU GPL'd, by Google Inc.");
-  VG_(details_bug_reports_to)  ((Char*)"data-race-test@googlegroups.com");
-
-  VG_(basic_tool_funcs)        (ts_post_clo_init,
-                                ts_instrument,
-                                ts_fini);
-
-  VG_(needs_client_requests)     (ts_handle_client_request);
-
-  VG_(needs_command_line_options)(ts_process_cmd_line_option,
-                                  ts_print_usage,
-                                  ts_print_debug_usage);
-   VG_(track_pre_thread_ll_create)( evh__pre_thread_ll_create );
-   VG_(track_pre_thread_ll_exit)  ( evh__pre_thread_ll_exit );
-
-   if (!g_race_verifier_active) {
-     VG_(track_workq_task_start)( evh__pre_workq_task_start );
-     VG_(track_pre_thread_first_insn)( evh__pre_thread_first_insn );
-   }
-
-   VG_(clo_vex_control).iropt_unroll_thresh = 0;
-   VG_(clo_vex_control).guest_chase_thresh = 0;
-
-   VG_(track_pre_deliver_signal) (&SignalIn);
-   VG_(track_post_deliver_signal)(&SignalOut);
-
-   VG_(track_start_client_code)( OnStartClientCode );
-}
-
-VG_DETERMINE_INTERFACE_VERSION(ts_pre_clo_init)
-
-// {{{1 end
-// vim:shiftwidth=2:softtabstop=2:expandtab
diff --git a/tsan/ts_valgrind.h b/tsan/ts_valgrind.h
deleted file mode 100644
index 54bb150..0000000
--- a/tsan/ts_valgrind.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-  This file is part of ThreadSanitizer, a dynamic data race detector 
-  based on Valgrind.
-
-  Copyright (C) 2008-2009 Google Inc
-     opensource@google.com 
-
-  This program is free software; you can redistribute it and/or
-  modify it under the terms of the GNU General Public License as
-  published by the Free Software Foundation; either version 2 of the
-  License, or (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-  02111-1307, USA.
-
-  The GNU General Public License is contained in the file COPYING.
-*/
-
-// Author: Konstantin Serebryany.
-// Note: the rest of ThreadSanitizer is published under the BSD license.
-
-#ifndef TS_VALGRIND_H_
-#define TS_VALGRIND_H_
-
-#include <stdint.h>
-extern "C" {
-#include "pub_tool_basics.h"
-#include "pub_tool_libcassert.h"
-#include "pub_tool_libcbase.h"
-#include "pub_tool_mallocfree.h"
-#include "pub_tool_libcprint.h"
-#include "pub_tool_libcfile.h"
-#include "pub_tool_libcproc.h"
-#include "pub_tool_vki.h"
-#include "pub_tool_threadstate.h"
-#include "pub_tool_errormgr.h"
-#include "pub_tool_options.h"
-#include "pub_tool_machine.h"
-#include "pub_tool_debuginfo.h"
-#include "pub_tool_seqmatch.h"
-#include "pub_tool_tooliface.h"
-#include "pub_tool_options.h"
-} // extern "C"
-#if defined(VGP_arm_linux)
-// A hacky trick to disable the inclusion of bits/string3.h on ARM.
-// TODO(glider): this may be specific to Ubuntu 9.10 gcc configuration.
-#define __USE_FORTIFY_LEVEL 0
-#endif
-#endif //  TS_VALGRIND_H_
-// {{{1 end
-// vim:shiftwidth=2:softtabstop=2:expandtab
diff --git a/tsan/ts_valgrind_client_requests.h b/tsan/ts_valgrind_client_requests.h
deleted file mode 100644
index 99fbfb0..0000000
--- a/tsan/ts_valgrind_client_requests.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Kostya Serebryany
- */
-
-/* This file lists ThreadSanitizer/Valgrind client requests.
-   See ts_valgrind.cc for details.
-   This file may need to be included into source files
-   outside of ThreadSanitizer, hence the BSD license.
-*/
-
-#ifndef TS_VALGRIND_CLIENT_REQUESTS_H_
-#define TS_VALGRIND_CLIENT_REQUESTS_H_
-
-#include "valgrind.h"
-
-/*
- * WARNING: The following enum defines the ThreadSanitizer ABI.
- * Existing elements should never be changed, new elements should be added to
- * the end.
- */
-enum {
-  TSREQ_NOOP = VG_USERREQ_TOOL_BASE('T', 'S'),
-  TSREQ_CLEAN_MEMORY,
-  TSREQ_MAIN_IN,
-  TSREQ_MAIN_OUT,
-  TSREQ_MALLOC,
-  TSREQ_FREE,
-  TSREQ_MMAP,
-  TSREQ_MUNMAP,
-  TSREQ_BENIGN_RACE,
-  TSREQ_EXPECT_RACE,
-  TSREQ_PCQ_CREATE,
-  TSREQ_PCQ_DESTROY,
-  TSREQ_PCQ_PUT,
-  TSREQ_PCQ_GET,
-  TSREQ_TRACE_MEM,
-  TSREQ_MUTEX_IS_USED_AS_CONDVAR,
-  TSREQ_IGNORE_READS_BEGIN,
-  TSREQ_IGNORE_READS_END,
-  TSREQ_IGNORE_WRITES_BEGIN,
-  TSREQ_IGNORE_WRITES_END,
-  TSREQ_SET_THREAD_NAME,
-  TSREQ_IGNORE_ALL_ACCESSES_BEGIN,
-  TSREQ_IGNORE_ALL_ACCESSES_END,
-  TSREQ_IGNORE_ALL_SYNC_BEGIN,
-  TSREQ_IGNORE_ALL_SYNC_END,
-  TSREQ_GLOBAL_IGNORE_ON,
-  TSREQ_GLOBAL_IGNORE_OFF,
-  TSREQ_PUBLISH_MEMORY_RANGE,
-  TSREQ_UNPUBLISH_MEMORY_RANGE,
-  TSREQ_PRINT_MEMORY_USAGE,
-  TSREQ_PRINT_STATS,
-  TSREQ_RESET_STATS,
-  TSREQ_SET_MY_PTHREAD_T,
-  TSREQ_THR_STACK_TOP,
-  TSREQ_SET_STACKTOP_STACKSIZE,
-  TSREQ_PTH_API_ERROR,
-  TSREQ_PTHREAD_JOIN_POST,
-  TSREQ_PTHREAD_RWLOCK_CREATE_POST,
-  TSREQ_PTHREAD_RWLOCK_DESTROY_PRE,
-  TSREQ_PTHREAD_RWLOCK_LOCK_POST,
-  TSREQ_PTHREAD_RWLOCK_UNLOCK_PRE,
-  TSREQ_PTHREAD_SPIN_LOCK_INIT_OR_UNLOCK,
-  TSREQ_POSIX_SEM_INIT_POST,
-  TSREQ_POSIX_SEM_DESTROY_PRE,
-  TSREQ_SIGNAL,
-  TSREQ_WAIT,
-  TSREQ_CYCLIC_BARRIER_INIT,
-  TSREQ_CYCLIC_BARRIER_WAIT_BEFORE,
-  TSREQ_CYCLIC_BARRIER_WAIT_AFTER,
-  TSREQ_GET_MY_SEGMENT,
-  TSREQ_GET_THREAD_ID,
-  TSREQ_GET_VG_THREAD_ID,
-  TSREQ_GET_SEGMENT_ID,
-  TSREQ_THREAD_SANITIZER_QUERY,
-  TSREQ_FLUSH_STATE,
-  TSREQ_MUTEX_IS_NOT_PHB,  // The opposite of TSREQ_MUTEX_IS_USED_AS_CONDVAR.
-  TSREQ_FLUSH_EXPECTED_RACES
-};
-#endif  // TS_VALGRIND_CLIENT_REQUESTS_H_
-// end. {{{1
-// vim:shiftwidth=2:softtabstop=2:expandtab
diff --git a/tsan/ts_valgrind_intercepts.c b/tsan/ts_valgrind_intercepts.c
deleted file mode 100644
index 3718b0f..0000000
--- a/tsan/ts_valgrind_intercepts.c
+++ /dev/null
@@ -1,2828 +0,0 @@
-/*
-  This file is part of ThreadSanitizer, a dynamic data race detector
-  based on Valgrind.
-
-  Copyright (C) 2008-2009 Google Inc
-     opensource@google.com
-  Copyright (C) 2007-2008 OpenWorks LLP
-      info@open-works.co.uk
-
-  This program is free software; you can redistribute it and/or
-  modify it under the terms of the GNU General Public License as
-  published by the Free Software Foundation; either version 2 of the
-  License, or (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-  02111-1307, USA.
-
-  The GNU General Public License is contained in the file COPYING.
-*/
-
-// Author: Konstantin Serebryany.
-// Parts of the code in this file are derived from Helgrind,
-// a data race detector written by Julian Seward.
-// Note that the rest of ThreadSanitizer code is not derived from Helgrind
-// and is published under the BSD license.
-
-#define _GNU_SOURCE 1
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <errno.h>
-#include <pthread.h>
-#include <fcntl.h>  // O_CREAT
-#include <unistd.h> // F_LOCK
-
-#include "valgrind.h"
-#include "pub_tool_basics.h"
-#include "pub_tool_redir.h"
-#include "pub_tool_threadstate.h"
-
-#define NOINLINE __attribute__ ((noinline))
-
-#include "ts_valgrind_client_requests.h"
-
-// When replacing a function in valgrind, the replacement code
-// is instrumented, so we just don't touch reads/writes in replacement
-// functions.
-#define EXTRA_REPLACE_PARAMS
-#define EXTRA_REPLACE_ARGS
-#define REPORT_READ_RANGE(x, size)
-#define REPORT_WRITE_RANGE(x, size)
-#include "ts_replace.h"
-
-#define TRACE_PTH_FNS 0
-#define TRACE_ANN_FNS 0
-
-
-//----------- Basic stuff --------------------------- {{{1
-
-static inline int VALGRIND_TS_THREAD_ID(void) {
-  unsigned int _qzz_res;
-  VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 ,
-                             TSREQ_GET_THREAD_ID,
-                             0, 0, 0, 0, 0);
-  return _qzz_res;
-}
-
-static inline int VALGRIND_VG_THREAD_ID(void) {
-  unsigned int _qzz_res;
-  VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 ,
-                             TSREQ_GET_VG_THREAD_ID,
-                             0, 0, 0, 0, 0);
-  return _qzz_res;
-}
-
-static inline int  VALGRIND_TS_SEGMENT_ID(void) {
-  unsigned int _qzz_res;
-  VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 ,
-                             TSREQ_GET_SEGMENT_ID,
-                             0, 0, 0, 0, 0);
-  return _qzz_res;
-}
-
-#define PTH_FUNC(ret_ty, f, args...) \
-   ret_ty I_WRAP_SONAME_FNNAME_ZZ(VG_Z_LIBPTHREAD_SONAME,f)(args); \
-   ret_ty I_WRAP_SONAME_FNNAME_ZZ(VG_Z_LIBPTHREAD_SONAME,f)(args)
-
-#define NONE_FUNC(ret_ty, f, args...) \
-   ret_ty I_WRAP_SONAME_FNNAME_ZZ(NONE,f)(args); \
-   ret_ty I_WRAP_SONAME_FNNAME_ZZ(NONE,f)(args)
-
-#define LIBC_FUNC(ret_ty, f, args...) \
-   ret_ty I_WRAP_SONAME_FNNAME_ZZ(VG_Z_LIBC_SONAME,f)(args); \
-   ret_ty I_WRAP_SONAME_FNNAME_ZZ(VG_Z_LIBC_SONAME,f)(args)
-
-// libstdcZpZpZa = libstdc++
-#define LIBSTDCXX_FUNC(ret_ty, f, args...) \
-   ret_ty I_WRAP_SONAME_FNNAME_ZZ(VG_Z_LIBSTDCXX_SONAME,f)(args); \
-   ret_ty I_WRAP_SONAME_FNNAME_ZZ(VG_Z_LIBSTDCXX_SONAME,f)(args)
-
-
-// Do a client request.  This is a macro rather than a function
-// so as to avoid having an extra function in the stack trace.
-
-#define DO_CREQ_v_v(_creqF)                              \
-   do {                                                  \
-      Word _unused_res;                                  \
-      VALGRIND_DO_CLIENT_REQUEST(_unused_res, 0,         \
-                                 (_creqF),               \
-                                 0,0,0,0,0);             \
-   } while (0)
-
-#define DO_CREQ_v_W(_creqF, _ty1F,_arg1F)                \
-   do {                                                  \
-      Word _unused_res, _arg1;                           \
-      assert(sizeof(_ty1F) == sizeof(Word));             \
-      _arg1 = (Word)(_arg1F);                            \
-      VALGRIND_DO_CLIENT_REQUEST(_unused_res, 0,         \
-                                 (_creqF),               \
-                                 _arg1, 0,0,0,0);        \
-   } while (0)
-
-#define DO_CREQ_v_WW(_creqF, _ty1F,_arg1F, _ty2F,_arg2F) \
-   do {                                                  \
-      Word _unused_res, _arg1, _arg2;                    \
-      assert(sizeof(_ty1F) == sizeof(Word));             \
-      assert(sizeof(_ty2F) == sizeof(Word));             \
-      _arg1 = (Word)(_arg1F);                            \
-      _arg2 = (Word)(_arg2F);                            \
-      VALGRIND_DO_CLIENT_REQUEST(_unused_res, 0,         \
-                                 (_creqF),               \
-                                 _arg1,_arg2,0,0,0);     \
-   } while (0)
-
-#define DO_CREQ_W_WW(_resF, _creqF, _ty1F,_arg1F, _ty2F,_arg2F) \
-   do {                                                  \
-      Word _res, _arg1, _arg2;                           \
-      assert(sizeof(_ty1F) == sizeof(Word));             \
-      assert(sizeof(_ty2F) == sizeof(Word));             \
-      _arg1 = (Word)(_arg1F);                            \
-      _arg2 = (Word)(_arg2F);                            \
-      VALGRIND_DO_CLIENT_REQUEST(_res, 2,                \
-                                 (_creqF),               \
-                                 _arg1,_arg2,0,0,0);     \
-      _resF = _res;                                      \
-   } while (0)
-
-#define DO_CREQ_v_WWW(_creqF, _ty1F,_arg1F,              \
-		      _ty2F,_arg2F, _ty3F, _arg3F)       \
-   do {                                                  \
-      Word _unused_res, _arg1, _arg2, _arg3;             \
-      assert(sizeof(_ty1F) == sizeof(Word));             \
-      assert(sizeof(_ty2F) == sizeof(Word));             \
-      assert(sizeof(_ty3F) == sizeof(Word));             \
-      _arg1 = (Word)(_arg1F);                            \
-      _arg2 = (Word)(_arg2F);                            \
-      _arg3 = (Word)(_arg3F);                            \
-      VALGRIND_DO_CLIENT_REQUEST(_unused_res, 0,         \
-                                 (_creqF),               \
-                                 _arg1,_arg2,_arg3,0,0); \
-   } while (0)
-
-#define DO_CREQ_v_WWWW(_creqF, _ty1F,_arg1F, _ty2F,_arg2F,\
-		      _ty3F,_arg3F, _ty4F, _arg4F)       \
-   do {                                                  \
-      Word _unused_res, _arg1, _arg2, _arg3, _arg4;      \
-      assert(sizeof(_ty1F) == sizeof(Word));             \
-      assert(sizeof(_ty2F) == sizeof(Word));             \
-      assert(sizeof(_ty3F) == sizeof(Word));             \
-      assert(sizeof(_ty4F) == sizeof(Word));             \
-      _arg1 = (Word)(_arg1F);                            \
-      _arg2 = (Word)(_arg2F);                            \
-      _arg3 = (Word)(_arg3F);                            \
-      _arg4 = (Word)(_arg4F);                            \
-      VALGRIND_DO_CLIENT_REQUEST(_unused_res, 0,         \
-                              (_creqF),                  \
-                             _arg1,_arg2,_arg3,_arg4,0); \
-   } while (0)
-
-
-
-#define DO_PthAPIerror(_fnnameF, _errF)                  \
-   do {                                                  \
-      char* _fnname = (char*)(_fnnameF);                 \
-      long  _err    = (long)(int)(_errF);                \
-      char* _errstr = lame_strerror(_err);               \
-      DO_CREQ_v_WWW(TSREQ_PTH_API_ERROR,                 \
-                    char*,_fnname,                       \
-                    long,_err, char*,_errstr);           \
-   } while (0)
-
-static inline void IGNORE_ALL_ACCESSES_BEGIN(void) {
-   DO_CREQ_v_W(TSREQ_IGNORE_ALL_ACCESSES_BEGIN,  void*, NULL);
-}
-
-static inline void IGNORE_ALL_ACCESSES_END(void) {
-   DO_CREQ_v_W(TSREQ_IGNORE_ALL_ACCESSES_END,  void*, NULL);
-}
-
-static inline void IGNORE_ALL_SYNC_BEGIN(void) {
-   DO_CREQ_v_W(TSREQ_IGNORE_ALL_SYNC_BEGIN,  void*, NULL);
-}
-
-static inline void IGNORE_ALL_SYNC_END(void) {
-   DO_CREQ_v_W(TSREQ_IGNORE_ALL_SYNC_END,  void*, NULL);
-}
-
-static inline void IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(void) {
-  IGNORE_ALL_ACCESSES_BEGIN();
-  IGNORE_ALL_SYNC_BEGIN();
-}
-
-static inline void IGNORE_ALL_ACCESSES_AND_SYNC_END(void) {
-  IGNORE_ALL_ACCESSES_END();
-  IGNORE_ALL_SYNC_END();
-}
-
-//-------------- Wrapper for main() -------- {{{1
-#define MAIN_WRAPPER_DECL \
- int I_WRAP_SONAME_FNNAME_ZU(NONE,main) (long argc, char **argv, char **env)
-
-MAIN_WRAPPER_DECL;
-MAIN_WRAPPER_DECL {
-  int ret;
-  OrigFn fn;
-  VALGRIND_GET_ORIG_FN(fn);
-  DO_CREQ_v_WW(TSREQ_MAIN_IN,  long, argc, char **, argv);
-  CALL_FN_W_WWW(ret, fn, argc, argv, env);
-  DO_CREQ_v_W(TSREQ_MAIN_OUT,  void*, ret);
-  return ret;
-}
-
-//-------------- MALLOC -------------------- {{{1
-
-// We ignore memory accesses and sync events inside malloc.
-// Accesses are ignored so that we don't spend time on them.
-// Sync events are ignored so that malloc does not create h-b arcs.
-// Currently, we ignore only Lock/Unlock events, not any other sync events.
-
-#define WRAP_MALLOC(soname, fnname) \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (SizeT n); \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (SizeT n) { \
-    void* ret; \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_W(ret, fn, n); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    DO_CREQ_v_WW(TSREQ_MALLOC,  void*, ret, long, n); \
-    return ret; \
-  }
-
-#define WRAP_CALLOC(soname, fnname) \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (SizeT n, SizeT c); \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (SizeT n, SizeT c) { \
-    void* ret; \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_WW(ret, fn, n, c); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    DO_CREQ_v_WW(TSREQ_MALLOC,  void*, ret, long, n * c); \
-    return ret; \
-  }
-
-#define WRAP_REALLOC(soname, fnname) \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *ptr, SizeT n); \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *ptr, SizeT n) { \
-    void* ret; \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_WW(ret, fn, ptr, n); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    DO_CREQ_v_WW(TSREQ_MALLOC,  void*, ret, long, n); \
-    return ret; \
-  }
-
-#define WRAP_POSIX_MEMALIGN(soname, fnname) \
-  int I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void **ptr, long a, long size);\
-  int I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void **ptr, long a, long size){\
-    OrigFn fn;\
-    int ret;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_WWW(ret, fn, ptr, a, size); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    if (ret == 0) \
-      DO_CREQ_v_WW(TSREQ_MALLOC,  void*, *ptr, long, size); \
-    return ret; \
-  }
-
-#define WRAP_WORKQ_OPS(soname, fnname) \
-  int I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (int options, void* item, \
-                                              int priority);\
-  int I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (int options, void* item, \
-                                              int priority){\
-    OrigFn fn;\
-    int ret;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    CALL_FN_W_WWW(ret, fn, options, item, priority); \
-    /* Trigger only on workq_ops(QUEUE_ADD) */ \
-    if (options == 1) { \
-      DO_CREQ_v_W(TSREQ_SIGNAL, void*,item); \
-    } \
-    return ret; \
-  }
-
-WRAP_WORKQ_OPS(VG_Z_LIBC_SONAME, __workq_ops);
-
-#ifdef ANDROID
-#define OFF_T_SIZE 4
-#else
-// TODO: this is probably wrong for 32-bit code without -D_FILE_OFFSET_BITS=64
-#define OFF_T_SIZE 8
-#endif
-
-// Hacky workaround for https://bugs.kde.org/show_bug.cgi?id=228471
-// Used in mmap and lockf wrappers.
-#if VG_WORDSIZE < OFF_T_SIZE
-typedef unsigned long long OFF_T;
-#define CALL_FN_W_5WO_T(ret,fn,p1,p2,p3,p4,p5,off_t_p) CALL_FN_W_7W(ret,fn,\
-                        p1,p2,p3,p4,p5,off_t_p & 0xffffffff, off_t_p >> 32)
-#define CALL_FN_W_2WO_T(ret,fn,p1,p2,off_t_p) CALL_FN_W_WWWW(ret,fn,\
-                                 p1,p2,off_t_p & 0xffffffff, off_t_p >> 32)
-#else
-typedef long OFF_T;
-#define CALL_FN_W_5WO_T(ret,fn,p1,p2,p3,p4,p5,off_t_p) CALL_FN_W_6W(ret,fn,\
-                                                    p1,p2,p3,p4,p5,off_t_p)
-#define CALL_FN_W_2WO_T(ret,fn,p1,p2,off_t_p) CALL_FN_W_WWW(ret,fn,\
-                                                    p1,p2,off_t_p)
-#endif
-
-#define WRAP_MMAP(soname, fnname) \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *ptr, long size, long a, \
-                                                long b, long c, OFF_T d); \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *ptr, long size, long a, \
-                                                long b, long c, OFF_T d){ \
-    void* ret;\
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_5WO_T(ret, fn, ptr, size, a, b, c, d); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    if (ret != (void*)-1) { \
-      DO_CREQ_v_WW(TSREQ_MMAP,  void*, ret, long, size); \
-    } \
-    return ret; \
-  }
-
-#define WRAP_MUNMAP(soname, fnname) \
-  int I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *ptr, size_t size); \
-  int I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *ptr, size_t size){ \
-    int ret;\
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_WW(ret, fn, ptr, size); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    if (ret == 0) { \
-      DO_CREQ_v_WW(TSREQ_MUNMAP, void*, ptr, size_t, size); \
-    } \
-    return ret; \
-  }
-
-#define WRAP_ZONE_MALLOC(soname, fnname) \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void* zone, SizeT n); \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void* zone, SizeT n) { \
-    void* ret; \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_WW(ret, fn, zone, n); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    DO_CREQ_v_WW(TSREQ_MALLOC,  void*, ret, long, n); \
-    return ret; \
-  }
-
-#define WRAP_ZONE_CALLOC(soname, fnname) \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void* zone, SizeT n, SizeT c); \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void* zone, SizeT n, SizeT c) { \
-    void* ret; \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_WWW(ret, fn, zone, n, c); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    DO_CREQ_v_WW(TSREQ_MALLOC,  void*, ret, long, n * c); \
-    return ret; \
-  }
-
-#define WRAP_ZONE_REALLOC(soname, fnname) \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void* zone, void *ptr, SizeT n); \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void* zone, void *ptr, SizeT n) { \
-    void* ret; \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_W_WWW(ret, fn, zone, ptr, n); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-    DO_CREQ_v_WW(TSREQ_MALLOC, void*, ret, long, n); \
-    return ret; \
-  }
-
-
-WRAP_ZONE_MALLOC(VG_Z_LIBC_SONAME, malloc_zone_malloc);
-WRAP_ZONE_CALLOC(VG_Z_LIBC_SONAME, malloc_zone_calloc);
-WRAP_ZONE_REALLOC(VG_Z_LIBC_SONAME, malloc_zone_realloc);
-
-WRAP_MALLOC(VG_Z_LIBC_SONAME, malloc);
-WRAP_MALLOC(NONE, malloc);
-
-WRAP_MALLOC(VG_Z_LIBC_SONAME, valloc);
-WRAP_MALLOC(NONE, valloc);
-WRAP_MALLOC(VG_Z_LIBC_SONAME, pvalloc);
-WRAP_MALLOC(NONE, pvalloc);
-
-WRAP_MALLOC(NONE, _Znam);
-WRAP_MALLOC(NONE, _Znwm);
-WRAP_MALLOC(NONE, _Znaj);
-WRAP_MALLOC(NONE, _Znwj);
-WRAP_MALLOC(NONE, _ZnamRKSt9nothrow_t);
-WRAP_MALLOC(NONE, _ZnwmRKSt9nothrow_t);
-WRAP_MALLOC(NONE, _ZnajRKSt9nothrow_t);
-WRAP_MALLOC(NONE, _ZnwjRKSt9nothrow_t);
-// same for libstdc++.
-WRAP_MALLOC(VG_Z_LIBSTDCXX_SONAME, _Znam);
-WRAP_MALLOC(VG_Z_LIBSTDCXX_SONAME, _Znwm);
-WRAP_MALLOC(VG_Z_LIBSTDCXX_SONAME, _Znaj);
-WRAP_MALLOC(VG_Z_LIBSTDCXX_SONAME, _Znwj);
-WRAP_MALLOC(VG_Z_LIBSTDCXX_SONAME, _ZnamRKSt9nothrow_t);
-WRAP_MALLOC(VG_Z_LIBSTDCXX_SONAME, _ZnwmRKSt9nothrow_t);
-WRAP_MALLOC(VG_Z_LIBSTDCXX_SONAME, _ZnajRKSt9nothrow_t);
-WRAP_MALLOC(VG_Z_LIBSTDCXX_SONAME, _ZnwjRKSt9nothrow_t);
-
-
-WRAP_CALLOC(VG_Z_LIBC_SONAME, calloc);
-WRAP_CALLOC(NONE, calloc);
-
-WRAP_REALLOC(VG_Z_LIBC_SONAME, realloc); // TODO: handle free inside realloc
-WRAP_REALLOC(NONE, realloc); // TODO: handle free inside realloc
-WRAP_REALLOC(VG_Z_LIBC_SONAME, memalign);
-WRAP_REALLOC(NONE, memalign);
-WRAP_POSIX_MEMALIGN(VG_Z_LIBC_SONAME, posix_memalign);
-WRAP_POSIX_MEMALIGN(NONE, posix_memalign);
-
-WRAP_MMAP(VG_Z_LIBC_SONAME, mmap);
-WRAP_MMAP(NONE, mmap);
-
-WRAP_MUNMAP(VG_Z_LIBC_SONAME, munmap);
-WRAP_MUNMAP(NONE, munmap);
-
-#define WRAP_FREE(soname, fnname) \
-  void I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *ptr); \
-  void I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *ptr) { \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    DO_CREQ_v_W(TSREQ_FREE,  void*, ptr); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_v_W(fn, ptr); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-  }
-
-
-#define WRAP_FREE_ZZ(soname, fnname) \
-  void I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) (void *ptr); \
-  void I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) (void *ptr) { \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    DO_CREQ_v_W(TSREQ_FREE,  void*, ptr); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_v_W(fn, ptr); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-  }
-
-
-#define WRAP_ZONE_FREE(soname, fnname) \
-  void I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *zone, void *ptr); \
-  void I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *zone, void *ptr) { \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    DO_CREQ_v_W(TSREQ_FREE, void*, ptr); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN(); \
-      CALL_FN_v_WW(fn, zone, ptr); \
-    IGNORE_ALL_ACCESSES_AND_SYNC_END(); \
-  }
-
-WRAP_FREE(VG_Z_LIBC_SONAME, free);
-WRAP_ZONE_FREE(VG_Z_LIBC_SONAME, malloc_zone_free);
-
-WRAP_FREE(NONE, free);
-
-WRAP_FREE(NONE, _ZdlPv);
-WRAP_FREE(NONE, _ZdaPv);
-WRAP_FREE(NONE, _ZdlPvRKSt9nothrow_t);
-WRAP_FREE(NONE, _ZdaPvRKSt9nothrow_t);
-// same for libstdc++
-WRAP_FREE(VG_Z_LIBSTDCXX_SONAME, _ZdlPv);
-WRAP_FREE(VG_Z_LIBSTDCXX_SONAME, _ZdaPv);
-WRAP_FREE(VG_Z_LIBSTDCXX_SONAME, _ZdlPvRKSt9nothrow_t);
-WRAP_FREE(VG_Z_LIBSTDCXX_SONAME, _ZdaPvRKSt9nothrow_t);
-
-// operator delete
-WRAP_FREE_ZZ(NONE, operatorZsdeleteZa);
-
-
-/* Handle tcmalloc (http://code.google.com/p/google-perftools/) */
-
-/* tc_ functions (used when tcmalloc is running in release mode) */
-WRAP_MALLOC(NONE,tc_malloc);
-WRAP_MALLOC(NONE,tc_new);
-WRAP_MALLOC(NONE,tc_new_nothrow);
-WRAP_MALLOC(NONE,tc_newarray);
-WRAP_MALLOC(NONE,tc_newarray_nothrow);
-WRAP_FREE(NONE,tc_free);
-WRAP_FREE(NONE,tc_cfree);
-WRAP_FREE(NONE,tc_delete);
-WRAP_FREE(NONE,tc_delete_nothrow);
-WRAP_FREE(NONE,tc_deletearray);
-WRAP_FREE(NONE,tc_deletearray_nothrow);
-WRAP_CALLOC(NONE,tc_calloc);
-WRAP_REALLOC(NONE,tc_realloc);
-WRAP_MALLOC(NONE,tc_valloc);
-WRAP_POSIX_MEMALIGN(NONE,tc_memalign);
-WRAP_POSIX_MEMALIGN(NONE,tc_posix_memalign);
-
-
-
-//------------ Wrappers for stdio functions ---------
-/* These functions have internal synchronization that we don't handle and get
-   lots of false positives. To fix this, we wrap these functions, touch their
-   arguments, and pass them through to the original function, ignoring all
-   memory accesses inside it. */
-
-size_t I_WRAP_SONAME_FNNAME_ZU(VG_Z_LIBC_SONAME, fwrite) (const void *ptr, size_t size, size_t nmemb, void* stream);
-size_t I_WRAP_SONAME_FNNAME_ZU(VG_Z_LIBC_SONAME, fwrite) (const void *ptr, size_t size, size_t nmemb, void* stream) {
-  size_t ret;
-  OrigFn fn;
-  ReadMemory(ptr, size * nmemb);
-  VALGRIND_GET_ORIG_FN(fn);
-  IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN();
-  CALL_FN_W_WWWW(ret, fn, ptr, size, nmemb, stream);
-  IGNORE_ALL_ACCESSES_AND_SYNC_END();
-  return ret;
-}
-
-int I_WRAP_SONAME_FNNAME_ZU(VG_Z_LIBC_SONAME, puts) (const char *s);
-int I_WRAP_SONAME_FNNAME_ZU(VG_Z_LIBC_SONAME, puts) (const char *s) {
-  int ret;
-  OrigFn fn;
-  ReadString(s);
-  VALGRIND_GET_ORIG_FN(fn);
-  IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN();
-  CALL_FN_W_W(ret, fn, s);
-  IGNORE_ALL_ACCESSES_AND_SYNC_END();
-  return ret;
-}
-
-
-//-------------- PTHREADS -------------------- {{{1
-/* A lame version of strerror which doesn't use the real libc
-   strerror_r, since using the latter just generates endless more
-   threading errors (glibc goes off and does tons of crap w.r.t.
-   locales etc) */
-static char* lame_strerror ( long err )
-{   switch (err) {
-      case EPERM:       return "EPERM: Operation not permitted";
-      case ENOENT:      return "ENOENT: No such file or directory";
-      case ESRCH:       return "ESRCH: No such process";
-      case EINTR:       return "EINTR: Interrupted system call";
-      case EBADF:       return "EBADF: Bad file number";
-      case EAGAIN:      return "EAGAIN: Try again";
-      case ENOMEM:      return "ENOMEM: Out of memory";
-      case EACCES:      return "EACCES: Permission denied";
-      case EFAULT:      return "EFAULT: Bad address";
-      case EEXIST:      return "EEXIST: File exists";
-      case EINVAL:      return "EINVAL: Invalid argument";
-      case EMFILE:      return "EMFILE: Too many open files";
-      case ENOSYS:      return "ENOSYS: Function not implemented";
-      case EOVERFLOW:   return "EOVERFLOW: Value too large "
-                               "for defined data type";
-      case EBUSY:       return "EBUSY: Device or resource busy";
-      case ETIMEDOUT:   return "ETIMEDOUT: Connection timed out";
-      case EDEADLK:     return "EDEADLK: Resource deadlock would occur";
-      case EOPNOTSUPP:  return "EOPNOTSUPP: Operation not supported on "
-                               "transport endpoint"; /* honest, guv */
-      default:          return "tc_intercepts.c: lame_strerror(): "
-                               "unhandled case -- please fix me!";
-   }
-}
-
-
-// libpthread sentry functions.
-// Darwin implementations of several libpthread functions call other functions
-// that are intercepted by ThreadSanitizer as well. To avoid reacting on those
-// functions twice the status of each Valgrind thread is stored in the
-// tid_inside_pthread_lib array and all the client requests from the inner
-// pthread functions are ignored.
-
-static int tid_inside_pthread_lib[VG_N_THREADS];
-
-// A pthread_*() function must call pthread_lib_enter() if its implementation
-// calls or is called by another pthread_*() function. The function that
-// called pthread_lib_enter() should perform client requests to ThreadSanitizer
-// iff the return value of pthread_lib_enter() is equal to 1.
-static int pthread_lib_enter(void) {
-  int ret = 1, tid;
-  IGNORE_ALL_ACCESSES_BEGIN();
-  tid = VALGRIND_VG_THREAD_ID();
-  if (tid_inside_pthread_lib[tid]++) {
-    ret = 0;
-  } else {
-    ret = 1;
-  }
-  IGNORE_ALL_ACCESSES_END();
-  return ret;
-}
-
-// A pthread_*() function must call pthread_lib_exit() iff it has called
-// pthread_lib_enter().
-static void pthread_lib_exit(void) {
-  int tid;
-  IGNORE_ALL_ACCESSES_BEGIN();
-  tid = VALGRIND_VG_THREAD_ID();
-  tid_inside_pthread_lib[tid]--;
-  IGNORE_ALL_ACCESSES_END();
-}
-
-/*----------------------------------------------------------------*/
-/*--- pthread_create, pthread_join, pthread_exit               ---*/
-/*----------------------------------------------------------------*/
-
-static void* ThreadSanitizerStartThread ( void* xargsV )
-{
-   volatile Word volatile* xargs = (volatile Word volatile*) xargsV;
-   void*(*fn)(void*) = (void*(*)(void*))xargs[0];
-   void* arg         = (void*)xargs[1];
-   pthread_t me = pthread_self();
-   size_t stacksize = 0;
-   void *stackaddr = NULL;
-   pthread_attr_t attr;
-
-   /* Tell the tool what my pthread_t is. */
-   DO_CREQ_v_W(TSREQ_SET_MY_PTHREAD_T, pthread_t,me);
-#ifdef VGO_darwin
-   /* Tell the tool what my stack size and stack top are.
-      This is Darwin-specific and works as long as ThreadSanitizerStartThread
-      is used for pthreads only.
-   */
-   stacksize = pthread_get_stacksize_np(me);
-   stackaddr = pthread_get_stackaddr_np(me);
-   DO_CREQ_v_WW(TSREQ_SET_STACKTOP_STACKSIZE, void*, stackaddr,
-                                              size_t, stacksize);
-#else
-   if (pthread_getattr_np(pthread_self(), &attr) == 0) {
-     pthread_attr_getstack(&attr, &stackaddr, &stacksize);
-     pthread_attr_destroy(&attr);
-     DO_CREQ_v_WW(TSREQ_SET_STACKTOP_STACKSIZE,
-                  void*, (char*)stackaddr + stacksize,
-                  size_t, stacksize);
-   } else {
-     /* Let the tool guess where the stack starts. */
-     DO_CREQ_v_W(TSREQ_THR_STACK_TOP, void*, &stacksize);
-   }
-#endif
-   /* allow the parent to proceed.  We can't let it proceed until
-      we're ready because (1) we need to make sure it doesn't exit and
-      hence deallocate xargs[] while we still need it, and (2) we
-      don't want either parent nor child to proceed until the tool has
-      been notified of the child's pthread_t. */
-   xargs[2] = 0;
-   /* Now we can no longer safely use xargs[]. */
-   return (void*) fn( (void*)arg );
-}
-
-static int pthread_create_WRK(pthread_t *thread, const pthread_attr_t *attr,
-                              void *(*start) (void *), void *arg)
-{
-   int    ret;
-   OrigFn fn;
-   volatile Word xargs[3];
-
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_create wrapper"); fflush(stderr);
-   }
-   xargs[0] = (Word)start;
-   xargs[1] = (Word)arg;
-   xargs[2] = 1; /* serves as a spinlock -- sigh */
-
-   IGNORE_ALL_ACCESSES_BEGIN();
-     CALL_FN_W_WWWW(ret, fn, thread,attr,ThreadSanitizerStartThread,&xargs[0]);
-   IGNORE_ALL_ACCESSES_END();
-
-   if (ret == 0) {
-      /* we have to wait for the child to notify the tool of its
-         pthread_t before continuing */
-      while (xargs[2] != 0) {
-         /* Do nothing.  We need to spin until the child writes to
-            xargs[2].  However, that can lead to starvation in the
-            child and very long delays (eg, tc19_shadowmem on
-            ppc64-linux Fedora Core 6).  So yield the cpu if we can,
-            to let the child run at the earliest available
-            opportunity. */
-         sched_yield();
-      }
-   } else {
-      DO_PthAPIerror( "pthread_create", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: pth_create -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZucreate, // pthread_create (Darwin)
-              pthread_t *thread, const pthread_attr_t *attr,
-              void *(*start) (void *), void *arg) {
-   return pthread_create_WRK(thread, attr, start, arg);
-}
-PTH_FUNC(int, pthreadZucreateZAZa, // pthread_create@* (Linux)
-              pthread_t *thread, const pthread_attr_t *attr,
-              void *(*start) (void *), void *arg) {
-   return pthread_create_WRK(thread, attr, start, arg);
-}
-
-// pthread_join
-static int pthread_join_WRK(pthread_t thread, void** value_pointer)
-{
-   int ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_join wrapper"); fflush(stderr);
-   }
-
-   CALL_FN_W_WW(ret, fn, thread,value_pointer);
-
-   /* At least with NPTL as the thread library, this is safe because
-      it is guaranteed (by NPTL) that the joiner will completely gone
-      before pthread_join (the original) returns.  See email below.*/
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_W(TSREQ_PTHREAD_JOIN_POST, pthread_t,thread);
-   } else {
-      DO_PthAPIerror( "pthread_join", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: pth_join -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZujoin, // pthread_join (Linux)
-              pthread_t thread, void** value_pointer)
-{
-  return pthread_join_WRK(thread, value_pointer);
-}
-
-PTH_FUNC(int, pthreadZujoin$Za, // pthread_join$* (Darwin)
-              pthread_t thread, void** value_pointer)
-{
-  return pthread_join_WRK(thread, value_pointer);
-}
-
-
-
-/* Behaviour of pthread_join on NPTL:
-
-Me:
-I have a question re the NPTL pthread_join implementation.
-
-  Suppose I am the thread 'stayer'.
-
-  If I call pthread_join(quitter), is it guaranteed that the
-  thread 'quitter' has really exited before pthread_join returns?
-
-  IOW, is it guaranteed that 'quitter' will not execute any further
-  instructions after pthread_join returns?
-
-I believe this is true based on the following analysis of
-glibc-2.5 sources.  However am not 100% sure and would appreciate
-confirmation.
-
-  'quitter' will be running start_thread() in nptl/pthread_create.c
-
-  The last action of start_thread() is to exit via
-  __exit_thread_inline(0), which simply does sys_exit
-  (nptl/pthread_create.c:403)
-
-  'stayer' meanwhile is waiting for lll_wait_tid (pd->tid)
-  (call at nptl/pthread_join.c:89)
-
-  As per comment at nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h:536,
-  lll_wait_tid will not return until kernel notifies via futex
-  wakeup that 'quitter' has terminated.
-
-  Hence pthread_join cannot return until 'quitter' really has
-  completely disappeared.
-
-Drepper:
->   As per comment at nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h:536,
->   lll_wait_tid will not return until kernel notifies via futex
->   wakeup that 'quitter' has terminated.
-That's the key.  The kernel resets the TID field after the thread is
-done.  No way the joiner can return before the thread is gone.
-*/
-
-#ifdef ANDROID
-// Android-specific part. Ignore some internal synchronization in bionic.
-PTH_FUNC(int, pthreadZuexit, void* retval) // pthread_exit (Android)
-{
-  int ret;
-  OrigFn fn;
-  VALGRIND_GET_ORIG_FN(fn);
-  IGNORE_ALL_ACCESSES_AND_SYNC_BEGIN();
-  CALL_FN_W_W(ret, fn, retval);
-  IGNORE_ALL_ACCESSES_AND_SYNC_END();
-  return ret;
-}
-#endif
-
-
-/*----------------------------------------------------------------*/
-/*--- pthread_mutex_t functions                                ---*/
-/*----------------------------------------------------------------*/
-
-/* Handled:   pthread_mutex_init pthread_mutex_destroy
-              pthread_mutex_lock
-              pthread_mutex_trylock
-              pthread_mutex_timedlock
-              pthread_mutex_unlock
-
-              pthread_spin_init pthread_spin_destroy
-              pthread_spin_lock
-              pthread_spin_trylock
-              pthread_spin_unlock
-*/
-
-// pthread_mutex_init
-PTH_FUNC(int, pthreadZumutexZuinit, // pthread_mutex_init
-              pthread_mutex_t *mutex,
-              pthread_mutexattr_t* attr)
-{
-   int    ret;
-   long   mbRec;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_mxinit %p", mutex); fflush(stderr);
-   }
-
-   mbRec = 0;
-   if (attr) {
-      int ty, zzz;
-      zzz = pthread_mutexattr_gettype(attr, &ty);
-      if (zzz == 0 && ty == PTHREAD_MUTEX_RECURSIVE)
-         mbRec = 1;
-   }
-
-   CALL_FN_W_WW(ret, fn, mutex,attr);
-
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_CREATE_POST,
-                   pthread_mutex_t*,mutex, long,mbRec);
-   } else {
-      DO_PthAPIerror( "pthread_mutex_init", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: mxinit -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-
-// pthread_mutex_destroy
-PTH_FUNC(int, pthreadZumutexZudestroy, // pthread_mutex_destroy
-              pthread_mutex_t *mutex)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_mxdestroy %p", mutex); fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_DESTROY_PRE,
-               pthread_mutex_t*,mutex);
-
-   CALL_FN_W_W(ret, fn, mutex);
-
-   if (ret != 0) {
-      DO_PthAPIerror( "pthread_mutex_destroy", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: mxdestroy -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-
-// pthread_mutex_lock
-PTH_FUNC(int, pthreadZumutexZulock, // pthread_mutex_lock
-              pthread_mutex_t *mutex)
-{
-   int    ret;
-   OrigFn fn;
-   int is_outermost;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_mxlock %p", mutex); fflush(stderr);
-   }
-
-   is_outermost = pthread_lib_enter();
-
-   CALL_FN_W_W(ret, fn, mutex);
-
-   /* There's a hole here: libpthread now knows the lock is locked,
-      but the tool doesn't, so some other thread could run and detect
-      that the lock has been acquired by someone (this thread).  Does
-      this matter?  Not sure, but I don't think so. */
-
-   if (is_outermost) {
-      if ((ret == 0 /*success*/)) {
-         DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST,
-                      pthread_mutex_t*,mutex, long, 1);
-      } else {
-         DO_PthAPIerror( "pthread_mutex_lock", ret );
-      }
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: mxlock -> %d >>\n", ret);
-   }
-   pthread_lib_exit();
-   return ret;
-}
-
-
-// pthread_mutex_trylock.  The handling needed here is very similar
-// to that for pthread_mutex_lock, except that we need to tell
-// the pre-lock creq that this is a trylock-style operation, and
-// therefore not to complain if the lock is nonrecursive and
-// already locked by this thread -- because then it'll just fail
-// immediately with EBUSY.
-static int pthread_mutex_trylock_WRK(pthread_mutex_t *mutex)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_mxtrylock %p", mutex); fflush(stderr);
-   }
-
-   CALL_FN_W_W(ret, fn, mutex);
-
-   /* There's a hole here: libpthread now knows the lock is locked,
-      but the tool doesn't, so some other thread could run and detect
-      that the lock has been acquired by someone (this thread).  Does
-      this matter?  Not sure, but I don't think so. */
-
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST,
-                  pthread_mutex_t*,mutex, long, 1);
-   } else {
-      if (ret != EBUSY)
-         DO_PthAPIerror( "pthread_mutex_trylock", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: mxtrylock -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZumutexZutrylock, // pthread_mutex_trylock
-              pthread_mutex_t *mutex)
-{
-  return pthread_mutex_trylock_WRK(mutex);
-}
-
-
-// pthread_mutex_timedlock.  Identical logic to pthread_mutex_trylock.
-// Not implemented in Darwin pthreads.
-PTH_FUNC(int, pthreadZumutexZutimedlock, // pthread_mutex_timedlock
-   pthread_mutex_t *mutex,
-         void* timeout)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_mxtimedlock %p %p", mutex, timeout);
-      fflush(stderr);
-   }
-
-   CALL_FN_W_WW(ret, fn, mutex,timeout);
-
-   /* There's a hole here: libpthread now knows the lock is locked,
-      but the tool doesn't, so some other thread could run and detect
-      that the lock has been acquired by someone (this thread).  Does
-      this matter?  Not sure, but I don't think so. */
-
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST,
-                  pthread_mutex_t*,mutex, long, 1);
-   } else {
-      if (ret != ETIMEDOUT)
-         DO_PthAPIerror( "pthread_mutex_timedlock", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: mxtimedlock -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-
-// pthread_mutex_unlock
-PTH_FUNC(int, pthreadZumutexZuunlock, // pthread_mutex_unlock
-              pthread_mutex_t *mutex)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_mxunlk %p", mutex); fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_UNLOCK_PRE,
-               pthread_mutex_t*,mutex);
-
-   CALL_FN_W_W(ret, fn, mutex);
-
-   if (ret != 0 /*error*/) {
-      DO_PthAPIerror( "pthread_mutex_unlock", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " mxunlk -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-// pthread_spin_init
-PTH_FUNC(int, pthreadZuspinZuinit, void *lock, int pshared) {
-  int    ret;
-  OrigFn fn;
-  const char *func = "pthread_spin_init";
-  VALGRIND_GET_ORIG_FN(fn);
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, "<< %s %p", func, lock);
-  }
-  CALL_FN_W_WW(ret, fn, lock, pshared);
-  if (ret == 0)  {
-    DO_CREQ_v_W(TSREQ_PTHREAD_SPIN_LOCK_INIT_OR_UNLOCK, void *, lock);
-  }
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, " -- %p >>\n", lock);
-  }
-  return ret;
-}
-
-// pthread_spin_destroy
-PTH_FUNC(int, pthreadZuspinZudestroy, void *lock) {
-  int    ret;
-  OrigFn fn;
-  const char *func = "pthread_spin_destroy";
-  VALGRIND_GET_ORIG_FN(fn);
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, "<< %s %p", func, lock);
-  }
-  DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_DESTROY_PRE, void*, lock);
-  CALL_FN_W_W(ret, fn, lock);
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, " -- %p >>\n", lock);
-  }
-  return ret;
-}
-
-// pthread_spin_lock
-PTH_FUNC(int, pthreadZuspinZulock, void *lock) {
-  int    ret;
-  OrigFn fn;
-  const char *func = "pthread_spin_lock";
-  VALGRIND_GET_ORIG_FN(fn);
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, "<< %s %p", func, lock);
-  }
-  CALL_FN_W_W(ret, fn, lock);
-  if (ret == 0) {
-    DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST, void *, lock,
-                 long, 1 /*is_w*/);
-  }
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, " -- %p >>\n", lock);
-  }
-  return ret;
-}
-
-// pthread_spin_trylock
-PTH_FUNC(int, pthreadZuspinZutrylock, void *lock) {
-  int    ret;
-  OrigFn fn;
-  const char *func = "pthread_spin_trylock";
-  VALGRIND_GET_ORIG_FN(fn);
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, "<< %s %p", func, lock);
-  }
-  CALL_FN_W_W(ret, fn, lock);
-  if (ret == 0) {
-    DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST, void *, lock,
-                 long, 1 /*is_w*/);
-  }
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, " -- %p >>\n", lock);
-  }
-  return ret;
-}
-
-// pthread_spin_unlock
-PTH_FUNC(int, pthreadZuspinZuunlock, void *lock) {
-  int    ret;
-  OrigFn fn;
-  const char *func = "pthread_spin_unlock";
-  VALGRIND_GET_ORIG_FN(fn);
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, "<< %s %p", func, lock);
-  }
-  DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_UNLOCK_PRE, void*, lock);
-  CALL_FN_W_W(ret, fn, lock);
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, " -- %p >>\n", lock);
-  }
-  return ret;
-}
-
-
-/*----------------------------------------------------------------*/
-/*--- pthread_cond_t functions                                 ---*/
-/*----------------------------------------------------------------*/
-
-/* Handled:   pthread_cond_wait pthread_cond_timedwait
-              pthread_cond_signal pthread_cond_broadcast
-
-   Unhandled: pthread_cond_init pthread_cond_destroy
-              -- are these important?
-*/
-
-// pthread_cond_wait
-static int pthread_cond_wait_WRK(pthread_cond_t* cond, pthread_mutex_t* mutex)
-{
-  int ret;
-  OrigFn fn;
-
-  int is_outermost = pthread_lib_enter();
-  VALGRIND_GET_ORIG_FN(fn);
-
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, "<< pthread_cond_wait %p %p", cond, mutex);
-    fflush(stderr);
-  }
-  if (is_outermost) {
-    DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_UNLOCK_PRE, pthread_mutex_t*,mutex);
-  }
-
-  CALL_FN_W_WW(ret, fn, cond,mutex);
-
-  if (is_outermost) {
-    DO_CREQ_v_W(TSREQ_WAIT, void *,cond);
-    DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST, void *, mutex,
-                 long, 1 /*is_w*/);
-  }
-
-  if (ret != 0) {
-    DO_PthAPIerror( "pthread_cond_wait", ret );
-  }
-
-  if (TRACE_PTH_FNS) {
-    fprintf(stderr, " cowait -> %d >>\n", ret);
-  }
-
-  pthread_lib_exit();
-
-  return ret;
-}
-
-PTH_FUNC(int, pthreadZucondZuwaitZAZa, // pthread_cond_wait@*
-              pthread_cond_t* cond, pthread_mutex_t* mutex)
-{
-  return pthread_cond_wait_WRK(cond, mutex);
-}
-
-PTH_FUNC(int, pthreadZucondZuwait$Za, // pthread_cond_wait$*
-              pthread_cond_t* cond, pthread_mutex_t* mutex)
-{
-  return pthread_cond_wait_WRK(cond, mutex);
-}
-
-
-// pthread_cond_timedwait
-static int pthread_cond_timedwait_WRK(pthread_cond_t* cond,
-                                      pthread_mutex_t* mutex,
-                                      struct timespec* abstime)
-{
-   int ret;
-   OrigFn fn;
-   int is_outermost = pthread_lib_enter();
-   VALGRIND_GET_ORIG_FN(fn);
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_cond_timedwait %p %p %p",
-                      cond, mutex, abstime);
-      fflush(stderr);
-   }
-
-   /* Tell the tool a cond-wait is about to happen, so it can check
-      for bogus argument values.  In return it tells us whether it
-      thinks the mutex is valid or not. */
-   if (is_outermost) {
-     DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_UNLOCK_PRE, void *,mutex);
-   }
-
-
-   CALL_FN_W_WWW(ret, fn, cond,mutex,abstime);
-
-   if (is_outermost) {
-      if (ret == 0) {
-         DO_CREQ_v_W(TSREQ_WAIT, void *, cond);
-      }
-      DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST, void *,mutex,
-                  long, 1 /*is_w*/);
-   }
-
-   if (ret != 0 && ret != ETIMEDOUT) {
-      DO_PthAPIerror( "pthread_cond_timedwait", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " cotimedwait -> %d >>\n", ret);
-   }
-
-   pthread_lib_exit();
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZucondZutimedwaitZAZa, // pthread_cond_timedwait@*
-         pthread_cond_t* cond, pthread_mutex_t* mutex,
-         struct timespec* abstime)
-{
-  return pthread_cond_timedwait_WRK(cond, mutex, abstime);
-}
-
-PTH_FUNC(int, pthreadZucondZutimedwait$Za, // pthread_cond_timedwait$*
-         pthread_cond_t* cond, pthread_mutex_t* mutex,
-         struct timespec* abstime)
-{
-  return pthread_cond_timedwait_WRK(cond, mutex, abstime);
-}
-
-PTH_FUNC(int, pthreadZucondZutimedwaitZurelativeZunp, // pthread_cond_timedwait_relative_np
-         pthread_cond_t* cond, pthread_mutex_t* mutex,
-         struct timespec* abstime)
-{
-  return pthread_cond_timedwait_WRK(cond, mutex, abstime);
-}
-
-
-// pthread_cond_signal
-static int pthread_cond_signal_WRK(pthread_cond_t* cond)
-{
-   int ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_cond_signal %p", cond);
-      fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_SIGNAL,
-               pthread_cond_t*,cond);
-
-   CALL_FN_W_W(ret, fn, cond);
-
-   if (ret != 0) {
-      DO_PthAPIerror( "pthread_cond_signal", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " cosig -> %d >>\n", ret);
-   }
-
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZucondZusignal, // pthread_cond_signal
-              pthread_cond_t* cond)
-{
-  return pthread_cond_signal_WRK(cond);
-}
-
-PTH_FUNC(int, pthreadZucondZusignalZAZa, // pthread_cond_signal@*
-              pthread_cond_t* cond)
-{
-  return pthread_cond_signal_WRK(cond);
-}
-
-// pthread_cond_broadcast
-// Note, this is pretty much identical, from a dependency-graph
-// point of view, with cond_signal, so the code is duplicated.
-// Maybe it should be commoned up.
-static int pthread_cond_broadcast_WRK(pthread_cond_t* cond)
-{
-   int ret;
-   OrigFn fn;
-   pthread_lib_enter();
-   VALGRIND_GET_ORIG_FN(fn);
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_broadcast_signal %p", cond);
-      fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_SIGNAL,
-               pthread_cond_t*,cond);
-
-   CALL_FN_W_W(ret, fn, cond);
-
-   if (ret != 0) {
-      DO_PthAPIerror( "pthread_cond_broadcast", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " cobro -> %d >>\n", ret);
-   }
-
-   pthread_lib_exit();
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZucondZubroadcast, // pthread_cond_broadcast
-              pthread_cond_t* cond)
-{
-  return pthread_cond_broadcast_WRK(cond);
-}
-
-PTH_FUNC(int, pthreadZucondZubroadcastZAZa, // pthread_cond_broadcast@*
-              pthread_cond_t* cond)
-{
-  return pthread_cond_broadcast_WRK(cond);
-}
-
-static void do_wait(void *cv) {
-  DO_CREQ_v_W(TSREQ_WAIT, void *, cv);
-}
-
-/*----------------------------------------------------------------*/
-/*--- pthread_barrier_t functions                              ---*/
-/*----------------------------------------------------------------*/
-#if defined(VGO_darwin) || defined(ANDROID)
-typedef void pthread_barrier_t;
-#endif
-// pthread_barrier_wait
-static int pthread_barrier_wait_WRK(pthread_barrier_t* b)
-{
-   int ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_barrier_wait %p", b);
-      fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_CYCLIC_BARRIER_WAIT_BEFORE, void*,b);
-   CALL_FN_W_W(ret, fn, b);
-   DO_CREQ_v_W(TSREQ_CYCLIC_BARRIER_WAIT_AFTER, void*,b);
-
-   // FIXME: handle ret
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "  pthread_barrier_wait -> %d >>\n", ret);
-   }
-
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZubarrierZuwait, // pthread_barrier_wait
-              pthread_barrier_t* b)
-{
-  return pthread_barrier_wait_WRK(b);
-}
-
-// pthread_barrier_init
-PTH_FUNC(int, pthreadZubarrierZuinit, void *b, void *a, unsigned n) {
-   int ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   DO_CREQ_v_WW(TSREQ_CYCLIC_BARRIER_INIT, void*,b, unsigned long, n);
-   CALL_FN_W_WWW(ret, fn, b, a, n);
-   return ret;
-}
-/*----------------------------------------------------------------*/
-/*--- pthread_rwlock_t functions                               ---*/
-/*----------------------------------------------------------------*/
-
-/* Handled:   pthread_rwlock_init pthread_rwlock_destroy
-              pthread_rwlock_rdlock
-              pthread_rwlock_wrlock
-              pthread_rwlock_unlock
-
-   Unhandled: pthread_rwlock_timedrdlock
-              pthread_rwlock_tryrdlock
-
-              pthread_rwlock_timedwrlock
-              pthread_rwlock_trywrlock
-*/
-
-// pthread_rwlock_init
-static int pthread_rwlock_init_WRK(pthread_rwlock_t *rwl,
-                                   pthread_rwlockattr_t* attr)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_rwl_init %p", rwl); fflush(stderr);
-   }
-
-   CALL_FN_W_WW(ret, fn, rwl,attr);
-
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_CREATE_POST,
-                  pthread_rwlock_t*,rwl);
-   } else {
-      DO_PthAPIerror( "pthread_rwlock_init", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: rwl_init -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZurwlockZuinit, // pthread_rwlock_init
-              pthread_rwlock_t *rwl,
-              pthread_rwlockattr_t* attr)
-{
-  return pthread_rwlock_init_WRK(rwl, attr);
-}
-
-PTH_FUNC(int, pthreadZurwlockZuinit$Za, // pthread_rwlock_init$*
-              pthread_rwlock_t *rwl,
-              pthread_rwlockattr_t* attr)
-{
-  return pthread_rwlock_init_WRK(rwl, attr);
-}
-
-// pthread_rwlock_destroy
-static int pthread_rwlock_destroy_WRK( pthread_rwlock_t *rwl)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_rwl_destroy %p", rwl); fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_DESTROY_PRE,
-               pthread_rwlock_t*,rwl);
-
-   CALL_FN_W_W(ret, fn, rwl);
-
-   if (ret != 0) {
-      DO_PthAPIerror( "pthread_rwlock_destroy", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: rwl_destroy -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZurwlockZudestroy, // pthread_rwlock_destroy
-              pthread_rwlock_t *rwl)
-{
-  return pthread_rwlock_destroy_WRK(rwl);
-}
-
-PTH_FUNC(int, pthreadZurwlockZudestroy$Za, // pthread_rwlock_destroy$*
-              pthread_rwlock_t *rwl)
-{
-  return pthread_rwlock_destroy_WRK(rwl);
-}
-
-
-// pthread_rwlock_wrlock
-static int pthread_rwlock_wrlock_WRK(pthread_rwlock_t* rwlock)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_rwl_wlk %p", rwlock); fflush(stderr);
-   }
-
-
-   IGNORE_ALL_SYNC_BEGIN();
-   CALL_FN_W_W(ret, fn, rwlock);
-   IGNORE_ALL_SYNC_END();
-
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST,
-                   pthread_rwlock_t*,rwlock, long,1/*isW*/);
-   } else {
-      DO_PthAPIerror( "pthread_rwlock_wrlock", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: rwl_wlk -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZurwlockZuwrlock, // pthread_rwlock_wrlock
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_wrlock_WRK(rwlock);
-}
-
-PTH_FUNC(int, pthreadZurwlockZuwrlock$Za, // pthread_rwlock_wrlock$*
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_wrlock_WRK(rwlock);
-}
-
-// pthread_rwlock_rdlock
-static int pthread_rwlock_rdlock_WRK(pthread_rwlock_t* rwlock)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_rwl_rlk %p", rwlock); fflush(stderr);
-   }
-
-   IGNORE_ALL_SYNC_BEGIN();
-   CALL_FN_W_W(ret, fn, rwlock);
-   IGNORE_ALL_SYNC_END();
-
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST,
-                   pthread_rwlock_t*,rwlock, long,0/*!isW*/);
-   } else {
-      DO_PthAPIerror( "pthread_rwlock_rdlock", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: rwl_rlk -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZurwlockZurdlock, // pthread_rwlock_rdlock
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_rdlock_WRK(rwlock);
-}
-
-PTH_FUNC(int, pthreadZurwlockZurdlock$Za, // pthread_rwlock_rdlock$*
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_rdlock_WRK(rwlock);
-}
-
-// pthread_rwlock_trywrlock
-static int pthread_rwlock_trywrlock_WRK(pthread_rwlock_t* rwlock)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_rwl_trywlk %p", rwlock); fflush(stderr);
-   }
-
-   IGNORE_ALL_SYNC_BEGIN();
-   CALL_FN_W_W(ret, fn, rwlock);
-   IGNORE_ALL_SYNC_END();
-
-   /* There's a hole here: libpthread now knows the lock is locked,
-      but the tool doesn't, so some other thread could run and detect
-      that the lock has been acquired by someone (this thread).  Does
-      this matter?  Not sure, but I don't think so. */
-
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST,
-                   pthread_rwlock_t*,rwlock, long,1/*isW*/);
-   } else {
-      if (ret != EBUSY)
-         DO_PthAPIerror( "pthread_rwlock_trywrlock", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: rwl_trywlk -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZurwlockZutrywrlock, // pthread_rwlock_trywrlock
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_trywrlock_WRK(rwlock);
-}
-
-PTH_FUNC(int, pthreadZurwlockZutrywrlock$Za, // pthread_rwlock_trywrlock$*
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_trywrlock_WRK(rwlock);
-}
-
-// pthread_rwlock_tryrdlock
-static int pthread_rwlock_tryrdlock_WRK(pthread_rwlock_t* rwlock)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_rwl_tryrlk %p", rwlock); fflush(stderr);
-   }
-
-   IGNORE_ALL_SYNC_BEGIN();
-   CALL_FN_W_W(ret, fn, rwlock);
-   IGNORE_ALL_SYNC_END();
-
-   /* There's a hole here: libpthread now knows the lock is locked,
-      but the tool doesn't, so some other thread could run and detect
-      that the lock has been acquired by someone (this thread).  Does
-      this matter?  Not sure, but I don't think so. */
-
-   if (ret == 0 /*success*/) {
-      DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST,
-                   pthread_rwlock_t*,rwlock, long,0/*!isW*/);
-   } else {
-      if (ret != EBUSY)
-         DO_PthAPIerror( "pthread_rwlock_tryrdlock", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: rwl_tryrlk -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZurwlockZutryrdlock, // pthread_rwlock_tryrdlock
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_tryrdlock_WRK(rwlock);
-}
-
-PTH_FUNC(int, pthreadZurwlockZutryrdlock$Za, // pthread_rwlock_tryrdlock$*
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_tryrdlock_WRK(rwlock);
-}
-
-
-// pthread_rwlock_unlock
-static int pthread_rwlock_unlock_WRK(pthread_rwlock_t* rwlock)
-{
-   int    ret;
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, "<< pthread_rwl_unlk %p", rwlock); fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_UNLOCK_PRE,
-               pthread_rwlock_t*,rwlock);
-
-   IGNORE_ALL_SYNC_BEGIN();
-   CALL_FN_W_W(ret, fn, rwlock);
-   IGNORE_ALL_SYNC_END();
-
-   if (ret != 0 /*error*/) {
-      DO_PthAPIerror( "pthread_rwlock_unlock", ret );
-   }
-
-   if (TRACE_PTH_FNS) {
-      fprintf(stderr, " :: rwl_unlk -> %d >>\n", ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(int, pthreadZurwlockZuunlock, // pthread_rwlock_unlock
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_unlock_WRK(rwlock);
-}
-
-PTH_FUNC(int, pthreadZurwlockZuunlock$Za, // pthread_rwlock_unlock$*
-	 pthread_rwlock_t* rwlock)
-{
-  return pthread_rwlock_unlock_WRK(rwlock);
-}
-
-/*----------------------------------------------------------------*/
-/*--- POSIX semaphores                                         ---*/
-/*----------------------------------------------------------------*/
-
-#include <semaphore.h>
-
-#define TRACE_SEM_FNS 0
-
-/* Handled:
-     int sem_init(sem_t *sem, int pshared, unsigned value);
-     int sem_destroy(sem_t *sem);
-     int sem_wait(sem_t *sem);
-     int sem_post(sem_t *sem);
-     int sem_trywait(sem_t *sem);
-
-   Unhandled:
-     int sem_timedwait(sem_t *restrict sem,
-                       const struct timespec *restrict abs_timeout);
-*/
-
-/* glibc-2.5 has sem_init@@GLIBC_2.2.5 (amd64-linux)
-             and sem_init@@GLIBC_2.1 (x86-linux): match sem_init@*
-   sem_init is not implemented for Darwin. */
-PTH_FUNC(int, semZuinitZAZa, sem_t* sem, int pshared, unsigned long value)
-{
-   OrigFn fn;
-   int    ret;
-   VALGRIND_GET_ORIG_FN(fn);
-
-   if (TRACE_SEM_FNS) {
-      fprintf(stderr, "<< sem_init(%p,%d,%lu) ", sem,pshared,value);
-      fflush(stderr);
-   }
-
-   CALL_FN_W_WWW(ret, fn, sem,pshared,value);
-
-   if (ret == 0) {
-      DO_CREQ_v_WW(TSREQ_POSIX_SEM_INIT_POST,
-                   sem_t*, sem, unsigned long, value);
-   } else {
-      DO_PthAPIerror( "sem_init", errno );
-   }
-
-   if (TRACE_SEM_FNS) {
-      fprintf(stderr, " sem_init -> %d >>\n", ret);
-      fflush(stderr);
-   }
-
-   return ret;
-}
-
-
-static int sem_destroy_WRK(sem_t* sem)
-{
-   OrigFn fn;
-   int    ret;
-   VALGRIND_GET_ORIG_FN(fn);
-
-   if (TRACE_SEM_FNS) {
-      fprintf(stderr, "<< sem_destroy(%p) ", sem);
-      fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_POSIX_SEM_DESTROY_PRE, sem_t*, sem);
-
-   CALL_FN_W_W(ret, fn, sem);
-
-   if (ret != 0) {
-      DO_PthAPIerror( "sem_destroy", errno );
-   }
-
-   if (TRACE_SEM_FNS) {
-      fprintf(stderr, " sem_destroy -> %d >>\n", ret);
-      fflush(stderr);
-   }
-
-   return ret;
-}
-
-/* glibc-2.5 has sem_destroy@@GLIBC_2.2.5 (amd64-linux)
-             and sem_destroy@@GLIBC_2.1 (x86-linux); match sem_destroy@* */
-PTH_FUNC(int, semZudestroyZAZa, sem_t* sem)
-{
-  return sem_destroy_WRK(sem);
-}
-
-// Darwin has sem_destroy.
-PTH_FUNC(int, semZudestroy, sem_t* sem)
-{
-  return sem_destroy_WRK(sem);
-}
-
-/* glibc-2.5 has sem_wait (amd64-linux); match sem_wait
-             and sem_wait@@GLIBC_2.1 (x86-linux); match sem_wait@* */
-/* wait: decrement semaphore - acquire lockage */
-static int sem_wait_WRK(sem_t* sem, const char *name, int is_try)
-{
-   OrigFn fn;
-   int    ret;
-   VALGRIND_GET_ORIG_FN(fn);
-
-   if (TRACE_SEM_FNS) {
-      fprintf(stderr, "<< %s(%p) ", name, sem);
-      fflush(stderr);
-   }
-
-   CALL_FN_W_W(ret, fn, sem);
-
-   if (ret == 0) {
-      DO_CREQ_v_W(TSREQ_WAIT, sem_t*,sem);
-   } else {
-      if (!is_try) {
-         DO_PthAPIerror( name, errno );
-      }
-   }
-
-   if (TRACE_SEM_FNS) {
-      fprintf(stderr, " %s -> %d >>\n", name, ret);
-      fflush(stderr);
-   }
-
-   return ret;
-}
-PTH_FUNC(int, semZuwait, sem_t* sem) { /* sem_wait */
-   return sem_wait_WRK(sem, "sem_wait", 0);
-}
-PTH_FUNC(int, semZuwaitZAZa, sem_t* sem) { /* sem_wait@* */
-   return sem_wait_WRK(sem, "sem_wait", 0);
-}
-PTH_FUNC(int, semZuwait$Za, sem_t* sem) { /* sem_wait$* */
-   return sem_wait_WRK(sem, "sem_wait", 0);
-}
-PTH_FUNC(int, semZutrywait, sem_t* sem) { /* sem_trywait */
-   return sem_wait_WRK(sem, "sem_trywait", 1);
-}
-PTH_FUNC(int, semZutrywaitZAZa, sem_t* sem) { /* sem_trywait@* */
-   return sem_wait_WRK(sem, "sem_trywait", 1);
-}
-PTH_FUNC(int, semZutrywait$Za, sem_t* sem) { /* sem_trywait$* */
-   return sem_wait_WRK(sem, "sem_trywait", 1);
-}
-
-
-
-
-/* glibc-2.5 has sem_post (amd64-linux); match sem_post
-             and sem_post@@GLIBC_2.1 (x86-linux); match sem_post@* */
-/* post: increment semaphore - release lockage */
-static int sem_post_WRK(OrigFn fn, sem_t* sem)
-{
-   int    ret;
-
-
-   if (TRACE_SEM_FNS) {
-      fprintf(stderr, "<< sem_post(%p) ", sem);
-      fflush(stderr);
-   }
-
-   DO_CREQ_v_W(TSREQ_SIGNAL, sem_t*,sem);
-
-   CALL_FN_W_W(ret, fn, sem);
-
-   if (ret != 0) {
-      DO_PthAPIerror( "sem_post", errno );
-   }
-
-   if (TRACE_SEM_FNS) {
-      fprintf(stderr, " sem_post -> %d >>\n", ret);
-      fflush(stderr);
-   }
-
-   return ret;
-}
-PTH_FUNC(int, semZupost, sem_t* sem) { /* sem_post */
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   return sem_post_WRK(fn, sem);
-}
-PTH_FUNC(int, semZupostZAZa, sem_t* sem) { /* sem_post@* */
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   return sem_post_WRK(fn, sem);
-}
-PTH_FUNC(int, semZupost$Za, sem_t* sem) { /* sem_post$* */
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   return sem_post_WRK(fn, sem);
-}
-
-/* From man page:
-   sem_t *sem_open(const char *name, int oflag, ...);
-   ...
-   The oflag argument controls whether the semaphore is created or merely
-   accessed by the call to sem_open(). The following flag bits may be
-   set in oflag:
-   ...
-   If O_CREAT is set and the semaphore already exists, then O_CREAT has no
-   effect, except as noted under O_EXCL. Otherwise, sem_open() creates a
-   named semaphore. The O_CREAT flag requires a third and a fourth
-   argument: mode, which is of type mode_t, and value, which is of
-   type unsigned int. The semaphore is created with an initial value of value.
-*/
-static sem_t *sem_open_WRK(OrigFn fn,
-                           const char *name, int oflag,
-                           mode_t mode, unsigned int value) {
-
-   sem_t *ret;
-   CALL_FN_W_WWWW(ret, fn, name, oflag, mode, value);
-   if ((oflag & O_CREAT) &&
-       value > 0 &&
-       ret != SEM_FAILED) {
-     // This semaphore has been created with a non-zero value.
-     // The semaphore is initialized only on the first call to sem_open,
-     // next call will return an existing semaphore.
-     // Ideally, we need to handle it like sem_init with a non-zero value.
-     // But in such case we also need to handle sem_unlink.
-     //
-     // To avoid this complexity we simply do a SIGNAL here.
-     DO_CREQ_v_W(TSREQ_SIGNAL, sem_t*, ret);
-   }
-   return ret;
-}
-
-PTH_FUNC(sem_t *, semZuopen, const char *name, int oflag,
-         mode_t mode, unsigned int value) { /* sem_open */
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   return sem_open_WRK(fn, name, oflag, mode, value);
-}
-
-PTH_FUNC(sem_t *, semZuopenZAZa, const char *name, int oflag,
-         mode_t mode, unsigned int value) { /* sem_open@* */
-   OrigFn fn;
-   VALGRIND_GET_ORIG_FN(fn);
-   return sem_open_WRK(fn, name, oflag, mode, value);
-}
-
-
-// atexit -> exit create a h-b arc.
-static void *AtExitMagic(void) {
-  return (void*)0x12345678;
-}
-
-#define ATEXIT_BODY { \
-   OrigFn fn;\
-   long    ret;\
-   VALGRIND_GET_ORIG_FN(fn);\
-   CALL_FN_W_W(ret, fn, callback);\
-   DO_CREQ_v_W(TSREQ_SIGNAL, void*, AtExitMagic());\
-   return ret;\
-}\
-
-NONE_FUNC(long, atexit, void *callback)  ATEXIT_BODY
-LIBC_FUNC(long, atexit, void *callback)  ATEXIT_BODY
-
-#define EXIT_BODY { \
-   OrigFn fn;\
-   VALGRIND_GET_ORIG_FN(fn);\
-   do_wait(AtExitMagic());\
-   CALL_FN_v_W(fn, x);\
-}\
-
-LIBC_FUNC(void, exit, int x) EXIT_BODY
-NONE_FUNC(void, exit, int x) EXIT_BODY
-
-// socket/file IO that creates happens-before arcs.
-static void *SocketMagic(long s) {
-  return (void*)0xDEADFBAD;
-}
-
-LIBC_FUNC(int, epoll_wait, int epfd, void * events, int maxevents, int timeout) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-//   fprintf(stderr, "T%d socket epoll_wait: %d\n", VALGRIND_TS_THREAD_ID(), epfd);
-   CALL_FN_W_WWWW(ret, fn, epfd, events, maxevents, timeout);
-   o = SocketMagic(epfd);
-   do_wait(o);
-   return ret;
-}
-
-LIBC_FUNC(int, epoll_ctl, int epfd, int op, int fd, void *event) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-//   fprintf(stderr, "T%d socket epoll_ctl: %d\n", VALGRIND_TS_THREAD_ID(), epfd);
-   o = SocketMagic(epfd);
-   DO_CREQ_v_W(TSREQ_SIGNAL, void*, o);
-   CALL_FN_W_WWWW(ret, fn, epfd, op, fd, event);
-   return ret;
-}
-
-PTH_FUNC(long, send, int s, void *buf, long len, int flags) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-//   fprintf(stderr, "T%d socket send: %d %ld\n", VALGRIND_TS_THREAD_ID(), s, len);
-   o = SocketMagic(s);
-   DO_CREQ_v_W(TSREQ_SIGNAL, void*, o);
-   CALL_FN_W_WWWW(ret, fn, s, buf, len, flags);
-   return ret;
-}
-
-PTH_FUNC(long, sendmsg, int s, void *msg, int flags) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-   o = SocketMagic(s);
-   DO_CREQ_v_W(TSREQ_SIGNAL, void*, o);
-   CALL_FN_W_WWW(ret, fn, s, msg, flags);
-   return ret;
-}
-
-// TODO(timurrrr): sendto
-
-PTH_FUNC(long, recv, int s, void *buf, long len, int flags) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-   CALL_FN_W_WWWW(ret, fn, s, buf, len, flags);
-//   fprintf(stderr, "T%d socket recv: %d %ld %ld\n", VALGRIND_TS_THREAD_ID(), s, len, ret);
-   o = SocketMagic(s);
-   if (ret >= 0) {
-      // Do client request only if we received something
-      // or the connection was closed.
-      do_wait(o);
-   }
-   return ret;
-}
-
-PTH_FUNC(long, recvmsg, int s, void *msg, int flags) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-   CALL_FN_W_WWW(ret, fn, s, msg, flags);
-   o = SocketMagic(s);
-   if (ret >= 0) {
-      // Do client request only if we received something
-      // or the connection was closed.
-      do_wait(o);
-   }
-   return ret;
-}
-
-// TODO(timurrrr): recvfrom
-
-PTH_FUNC(long, read, int s, void *a2, long count) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-   CALL_FN_W_WWW(ret, fn, s, a2, count);
-//   fprintf(stderr, "T%d socket read: %d %ld %ld\n", VALGRIND_TS_THREAD_ID(), s, count, ret);
-   o = SocketMagic(s);
-   if (ret >= 0) {
-      // Do client request only if we read something or the EOF was reached.
-      do_wait(o);
-   }
-   return ret;
-}
-
-PTH_FUNC(long, write, int s, void *a2, long a3) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-//   fprintf(stderr, "T%d socket write: %d\n", VALGRIND_TS_THREAD_ID(), s);
-   o = SocketMagic(s);
-   DO_CREQ_v_W(TSREQ_SIGNAL, void*, o);
-   CALL_FN_W_WWW(ret, fn, s, a2, a3);
-   return ret;
-}
-
-/* Linux: unlink
- * Darwin: unlink */
-LIBC_FUNC(long, unlink, void *path) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-   o = SocketMagic((long)path);
-   DO_CREQ_v_W(TSREQ_SIGNAL, void*, o);
-   CALL_FN_W_W(ret, fn, path);
-   return ret;
-}
-
-/* Linux: open
- * Darwin: open$NOCANCEL$UNIX2003 */
-static int open_WRK(void *path, int flags, int mode) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-   o = SocketMagic((long)path);
-   DO_CREQ_v_W(TSREQ_SIGNAL, void*, o);
-   CALL_FN_W_WWW(ret, fn, path, flags, mode);
-   do_wait(o);
-   return ret;
-}
-
-LIBC_FUNC(int, open, void *path, int flags, int mode) {
-  return open_WRK(path, flags, mode);
-}
-LIBC_FUNC(int, open$Za, void *path, int flags, int mode) {
-  return open_WRK(path, flags, mode);
-}
-
-/* Linux: rmdir
- * Darwin: rmdir */
-LIBC_FUNC(int, rmdir, void *path) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-   o = SocketMagic((long)path);
-   DO_CREQ_v_W(TSREQ_SIGNAL, void*, o);
-   CALL_FN_W_W(ret, fn, path);
-   return ret;
-}
-
-/* Linux: opendir
- * Darwin: opendir$UNIX2003 */
-static long opendir_WRK(void *path) {
-   OrigFn fn;
-   long    ret;
-   void *o;
-   VALGRIND_GET_ORIG_FN(fn);
-   CALL_FN_W_W(ret, fn, path);
-   o = SocketMagic((long)path);
-   do_wait(o);
-   return ret;
-}
-
-LIBC_FUNC(long, opendir, void *path) {
-  return opendir_WRK(path);
-}
-
-LIBC_FUNC(long, opendir$Za, void *path) {
-  return opendir_WRK(path);
-}
-
-#if !defined(ANDROID)
-LIBC_FUNC(int, lockf, int fd, int cmd, OFF_T offset) {
-  OrigFn fn;
-  void *o;
-  long ret;
-  VALGRIND_GET_ORIG_FN(fn);
-  o = SocketMagic(fd);
-  if (cmd == F_ULOCK) {
-    DO_CREQ_v_W(TSREQ_SIGNAL, void*, o);
-  }
-  CALL_FN_W_2WO_T(ret, fn, fd, cmd, offset);
-  if (cmd == F_LOCK && ret == 0) {
-    do_wait(o);
-  }
-  return ret;
-}
-#endif
-
-/*
-  Support for pthread_once and function-level static objects.
-
-  pthread_once is supported by simply ignoring everything that happens
-  inside pthread_once.
-
-  Another approach would be to SIGNAL when pthread_once with a given
-  pthread_once_t is called for the first time and to WAIT after
-  each pthread_once. But implementing this is a bit tricky and probably
-  not worth it.
-
-  Thread safe initialization of function-level static objects is
-  supported in gcc (strarting from 4.something).
-  From gcc/cp/decl.c:
-  --------------------------------------------------------------
-       Emit code to perform this initialization but once.  This code
-       looks like:
-
-       static <type> guard;
-       if (!guard.first_byte) {
-         if (__cxa_guard_acquire (&guard)) {
-           bool flag = false;
-           try {
-             // Do initialization.
-             flag = true; __cxa_guard_release (&guard);
-             // Register variable for destruction at end of program.
-            } catch {
-           if (!flag) __cxa_guard_abort (&guard);
-          }
-       }
-  --------------------------------------------------------------
-  So, when __cxa_guard_acquire returns true, we start ignoring all accesses
-  and in __cxa_guard_release we stop ignoring them.
-  We also need to ignore all accesses inside these two functions.
-
-  For examples, see test106 and test108 at
-  http://code.google.com/p/data-race-test/source/browse/trunk/unittest/racecheck_unittest.cc
-*/
-
-PTH_FUNC(int, pthreadZuonce, void *ctl, void *rtn) {
-   OrigFn fn;
-   int    ret;
-   VALGRIND_GET_ORIG_FN(fn);
-   IGNORE_ALL_ACCESSES_BEGIN();
-   // fprintf(stderr, "T%d: ->pthread_once\n", VALGRIND_TS_THREAD_ID);
-   CALL_FN_W_WW(ret, fn, ctl, rtn);
-   // fprintf(stderr, "T%d: <-pthread_once\n", VALGRIND_TS_THREAD_ID);
-   IGNORE_ALL_ACCESSES_END();
-   return ret;
-}
-
-LIBSTDCXX_FUNC(long, ZuZucxaZuguardZuacquire, void *p) {
-   OrigFn fn;
-   long    ret;
-   VALGRIND_GET_ORIG_FN(fn);
-   // fprintf(stderr, "T%d: ->__cxa_guard_acquire\n", VALGRIND_TS_THREAD_ID());
-   IGNORE_ALL_ACCESSES_BEGIN();
-   CALL_FN_W_W(ret, fn, p);
-   // fprintf(stderr, "T%d: <-__cxa_guard_acquire\n", VALGRIND_TS_THREAD_ID());
-   if (!ret) {
-     IGNORE_ALL_ACCESSES_END();
-   }
-   return ret;
-}
-LIBSTDCXX_FUNC(long, ZuZucxaZuguardZurelease, void *p) {
-   OrigFn fn;
-   long    ret;
-   VALGRIND_GET_ORIG_FN(fn);
-   // fprintf(stderr, "T%d: ->__cxa_guard_release\n", VALGRIND_TS_THREAD_ID());
-   CALL_FN_W_W(ret, fn, p);
-   // fprintf(stderr, "T%d: <-__cxa_guard_release\n", VALGRIND_TS_THREAD_ID());
-   IGNORE_ALL_ACCESSES_END();
-   return ret;
-}
-
-
-
-
-/*----------------------------------------------------------------*/
-/*--- Replace glibc's wretched optimised string fns (again!)   ---*/
-/*----------------------------------------------------------------*/
-/* Why we have to do all this nonsense:
-
-   Some implementations of strlen may read up to 7 bytes past the end
-   of the string thus touching memory which may not belong to this
-   string.
-
-   Such race is benign because the data read past the end of the
-   string is not used.
-*/
-// --- MEMCPY -----------------------------------------------------
-//
-#define MEMCPY(soname, fnname) \
-   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-            ( void *dst, const void *src, SizeT len ); \
-   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-            ( void *dst, const void *src, SizeT len ) \
-   { return Replace_memcpy(dst, src, len); }
-
-MEMCPY(VG_Z_LIBC_SONAME, memcpy)
-MEMCPY(NONE, memcpy)
-/* icc9 blats these around all over the place.  Not only in the main
-   executable but various .so's.  They are highly tuned and read
-   memory beyond the source boundary (although work correctly and
-   never go across page boundaries), so give errors when run natively,
-   at least for misaligned source arg.  Just intercepting in the exe
-   only until we understand more about the problem.  See
-   http://bugs.kde.org/show_bug.cgi?id=139776
- */
-MEMCPY(NONE, _intel_fast_memcpy)
-#if defined(VGO_linux)
-MEMCPY(VG_Z_LIBC_SONAME, __GI_memcpy);
-#endif
-
-// --- MEMMOVE -----------------------------------------------------
-//
-#define MEMMOVE(soname, fnname) \
-   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-            ( void *dst, const void *src, SizeT len ); \
-   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-            ( void *dst, const void *src, SizeT len ) \
-   { return Replace_memmove(dst, src, len); }
-
-MEMMOVE(VG_Z_LIBC_SONAME, memmove)
-MEMMOVE(NONE, memmove)
-#if defined(VGO_linux)
-MEMMOVE(VG_Z_LIBC_SONAME, __GI_memmove);
-#endif
-
-
-// --- STRCHR and INDEX -------------------------------------------
-//
-#define STRCHR(soname, fnname) \
-   char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ); \
-   char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ) \
-   { return Replace_strchr(s, c); }
-
-// Apparently index() is the same thing as strchr()
-STRCHR(VG_Z_LIBC_SONAME, strchr)
-STRCHR(VG_Z_LIBC_SONAME, index)
-STRCHR(NONE,             strchr)
-STRCHR(NONE,             index)
-#if defined(VGO_linux)
-STRCHR(VG_Z_LIBC_SONAME, __GI_strchr)
-#endif
-
-// --- STRCHRNUL --------------------------------------------------
-//
-#define STRCHRNUL(soname, fnname) \
-   char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ); \
-   char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ) \
-   { return Replace_strchrnul(s, c); }
-
-STRCHRNUL(VG_Z_LIBC_SONAME, strchrnul)
-STRCHRNUL(NONE,             strchrnul)
-#if defined(VGO_linux)
-STRCHRNUL(VG_Z_LIBC_SONAME, __GI_strchrnul)
-#endif
-
-// --- STRRCHR RINDEX -----------------------------------------------------
-//
-#define STRRCHR(soname, fnname) \
-   char* VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str, int c ); \
-   char* VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str, int c ) \
-   { return Replace_strrchr(str, c); }
-
-// Apparently rindex() is the same thing as strrchr()
-STRRCHR(VG_Z_LIBC_SONAME, strrchr)
-STRRCHR(VG_Z_LIBC_SONAME, rindex)
-STRRCHR(NONE,             strrchr)
-STRRCHR(NONE,             rindex)
-#if defined(VGO_linux)
-STRRCHR(VG_Z_LIBC_SONAME, __GI_strrchr)
-#endif
-
-// --- STRCMP -----------------------------------------------------
-//
-#define STRCMP(soname, fnname) \
-   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-          ( const char* s1, const char* s2 ); \
-   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-          ( const char* s1, const char* s2 ) \
-   { return Replace_strcmp(s1, s2); }
-
-STRCMP(VG_Z_LIBC_SONAME, strcmp)
-STRCMP(NONE,             strcmp)
-#if defined(VGO_linux)
-STRCMP(VG_Z_LIBC_SONAME, __GI_strcmp)
-#endif
-
-#define MEMCMP(soname, fnname) \
-   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-          ( const char* s1, const char* s2 , size_t n); \
-   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-          ( const char* s1, const char* s2 , size_t n) \
-   { return Replace_memcmp(s1, s2, n); }
-
-MEMCMP(VG_Z_LIBC_SONAME, __memcmp_ssse3)
-MEMCMP(VG_Z_LIBC_SONAME, memcmp)
-MEMCMP(NONE,             memcmp)
-#if defined(VGO_linux)
-MEMCMP(VG_Z_LIBC_SONAME, __GI_memcmp)
-#endif
-
-#define MEMCHR(soname, fnname) \
-   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const void *s, int c, SizeT n); \
-   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const void *s, int c, SizeT n) \
-   { return Replace_memchr(s, c, n); }
-
-MEMCHR(VG_Z_LIBC_SONAME, memchr)
-MEMCHR(NONE, memchr)
-
-#define STRNCMP(soname, fnname) \
-   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-          ( const char* s1, const char* s2, size_t n); \
-   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
-          ( const char* s1, const char* s2, size_t n) \
-   { return Replace_strncmp(s1, s2, n); }
-
-STRNCMP(VG_Z_LIBC_SONAME, strncmp)
-STRNCMP(NONE,             strncmp)
-#if defined(VGO_linux)
-STRNCMP(VG_Z_LIBC_SONAME, __GI_strncmp)
-#endif
-
-// --- STRLEN -----------------------------------------------------
-//
-// Note that this replacement often doesn't get used because gcc inlines
-// calls to strlen() with its own built-in version.  This can be very
-// confusing if you aren't expecting it.  Other small functions in this file
-// may also be inline by gcc.
-#define STRLEN(soname, fnname) \
-   SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ); \
-   SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ) \
-   { return Replace_strlen(str); }
-
-STRLEN(VG_Z_LIBC_SONAME, strlen)
-STRLEN(NONE,             strlen)
-#if defined(VGO_linux)
-STRLEN(VG_Z_LIBC_SONAME, __GI_strlen)
-#endif
-
-// --- STRCPY -----------------------------------------------------
-//
-#define STRCPY(soname, fnname) \
-   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ); \
-   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ) \
-   { return Replace_strcpy(dst, src); }
-
-STRCPY(VG_Z_LIBC_SONAME, strcpy)
-STRCPY(NONE,             strcpy)
-#if defined(VGO_linux)
-STRCPY(VG_Z_LIBC_SONAME, __GI_strcpy)
-#endif
-
-// --- STRNCPY -----------------------------------------------------
-//
-#define STRNCPY(soname, fnname) \
-   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src, size_t n ); \
-   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src, size_t n ) \
-   { return Replace_strncpy(dst, src, n); }
-
-STRNCPY(VG_Z_LIBC_SONAME, strncpy)
-STRNCPY(NONE,             strncpy)
-#if defined(VGO_linux)
-STRNCPY(VG_Z_LIBC_SONAME, __GI_strncpy)
-#endif
-
-// --- STRCAT -----------------------------------------------------
-//
-#define STRCAT(soname, fnname) \
-   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src); \
-   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src) \
-   { return Replace_strcat(dst, src); }
-
-STRCAT(VG_Z_LIBC_SONAME, strcat)
-STRCAT(NONE,             strcat)
-#if defined(VGO_linux)
-STRCAT(VG_Z_LIBC_SONAME, __GI_strcat)
-#endif
-
-// --- STPCPY -----------------------------------------------------
-//
-#define STPCPY(soname, fnname) \
-   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ); \
-   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ) \
-   { return Replace_stpcpy(dst, src); }
-
-STPCPY(VG_Z_LIBC_SONAME, stpcpy)
-STPCPY(NONE,             stpcpy)
-#if defined(VGO_linux)
-STPCPY(VG_Z_LIBC_SONAME, __GI_stpcpy)
-#endif
-
-//------------------------ Annotations ---------------- {{{1
-
-
-
-#define ANN_FUNC(ret_ty, f, args...) \
-    ret_ty I_WRAP_SONAME_FNNAME_ZZ(Za,f)(args); \
-    ret_ty I_WRAP_SONAME_FNNAME_ZZ(Za,f)(args)
-
-
-#define ANN_TRACE(args...) \
-    do{\
-      if(TRACE_ANN_FNS){\
-        int tid = VALGRIND_TS_THREAD_ID();\
-        int sid = VALGRIND_TS_SEGMENT_ID();\
-        fprintf(stderr, args);\
-        if(tid != 999999 && sid != 999999) fflush(stderr);\
-      }\
-    }while(0)
-
-ANN_FUNC(int, RunningOnValgrind, void) {
-  return 1;
-}
-
-ANN_FUNC(const char *, ThreadSanitizerQuery, const char *query) {
-  Word res;
-  DO_CREQ_W_WW(res, TSREQ_THREAD_SANITIZER_QUERY, const char*, query, long, 0);
-  return (const char *)res;
-}
-
-ANN_FUNC(void, AnnotateFlushState, const char *unused_file, int unused_line) {
-  DO_CREQ_v_v(TSREQ_FLUSH_STATE);
-}
-
-ANN_FUNC(void, AnnotateRWLockCreate, const char *file, int line, void *lock)
-{
-  const char *name = "AnnotateRWLockCreate";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, lock, file, line);
-  DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_CREATE_POST, void*, lock, long, 0 /*non recur*/);
-}
-
-ANN_FUNC(void, AnnotateRWLockDestroy, const char *file, int line, void *lock)
-{
-  const char *name = "AnnotateRWLockDestroy";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, lock, file, line);
-  DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_DESTROY_PRE, void*, lock);
-}
-
-ANN_FUNC(void, AnnotateRWLockAcquired, const char *file, int line, void *lock, int is_w)
-{
-  const char *name = "AnnotateRWLockAcquired";
-  ANN_TRACE("--#%d %s[%p] rw=%d %s:%d\n", tid, name, lock, is_w, file, line);
-  DO_CREQ_v_WW(TSREQ_PTHREAD_RWLOCK_LOCK_POST,  void*,lock,long, (long)is_w);
-}
-
-ANN_FUNC(void, AnnotateRWLockReleased, const char *file, int line, void *lock, int is_w)
-{
-  const char *name = "AnnotateRWLockReleased";
-  ANN_TRACE("--#%d %s[%p] rw=%d %s:%d\n", tid, name, lock, is_w, file, line);
-  DO_CREQ_v_W(TSREQ_PTHREAD_RWLOCK_UNLOCK_PRE, void*, lock);
-}
-
-ANN_FUNC(void, AnnotateCondVarWait, const char *file, int line, void *cv, void *lock)
-{
-  const char *name = "AnnotateCondVarWait";
-  ANN_TRACE("--#%d %s[%p|%p] %s:%d\n", tid, name, cv, lock, file, line);
-  do_wait(cv);
-}
-
-ANN_FUNC(void, AnnotateCondVarSignal, const char *file, int line, void *cv)
-{
-  const char *name = "AnnotateCondVarSignal";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, cv, file, line);
-  DO_CREQ_v_W(TSREQ_SIGNAL, void*,cv);
-}
-
-ANN_FUNC(void, AnnotateCondVarSignalAll, const char *file, int line, void *cv)
-{
-  const char *name = "AnnotateCondVarSignalAll";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, cv, file, line);
-  DO_CREQ_v_W(TSREQ_SIGNAL, void*,cv);
-}
-
-ANN_FUNC(void, AnnotateHappensBefore, const char *file, int line, void *obj)
-{
-  const char *name = "AnnotateHappensBefore";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, obj, file, line);
-  DO_CREQ_v_W(TSREQ_SIGNAL, void*, obj);
-}
-
-ANN_FUNC(void, WTFAnnotateHappensBefore, const char *file, int line, void *obj)
-{
-  const char *name = "WTFAnnotateHappensBefore";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, obj, file, line);
-  DO_CREQ_v_W(TSREQ_SIGNAL, void*, obj);
-}
-
-ANN_FUNC(void, AnnotateHappensAfter, const char *file, int line, void *obj)
-{
-  const char *name = "AnnotateHappensAfter";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, obj, file, line);
-  do_wait(obj);
-}
-
-ANN_FUNC(void, WTFAnnotateHappensAfter, const char *file, int line, void *obj)
-{
-  const char *name = "WTFAnnotateHappensAfter";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, obj, file, line);
-  do_wait(obj);
-}
-
-ANN_FUNC(void, AnnotatePCQCreate, const char *file, int line, void *pcq)
-{
-  const char *name = "AnnotatePCQCreate";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, pcq, file, line);
-  DO_CREQ_v_W(TSREQ_PCQ_CREATE,   void*,pcq);
-}
-
-ANN_FUNC(void, AnnotatePCQDestroy, const char *file, int line, void *pcq)
-{
-  const char *name = "AnnotatePCQDestroy";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, pcq, file, line);
-  DO_CREQ_v_W(TSREQ_PCQ_DESTROY,   void*,pcq);
-}
-
-ANN_FUNC(void, AnnotatePCQPut, const char *file, int line, void *pcq)
-{
-  const char *name = "AnnotatePCQPut";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, pcq, file, line);
-  DO_CREQ_v_W(TSREQ_PCQ_PUT,   void*,pcq);
-}
-
-ANN_FUNC(void, AnnotatePCQGet, const char *file, int line, void *pcq)
-{
-  const char *name = "AnnotatePCQGet";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, pcq, file, line);
-  DO_CREQ_v_W(TSREQ_PCQ_GET,   void*,pcq);
-}
-
-ANN_FUNC(void, AnnotateExpectRace, const char *file, int line, void *mem, char *description)
-{
-  const char *name = "AnnotateExpectRace";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mem, file, line);
-  DO_CREQ_v_WW(TSREQ_EXPECT_RACE, void*,mem, char*,description);
-}
-
-ANN_FUNC(void, AnnotateFlushExpectedRaces, const char *file, int line)
-{
-  const char *name = __FUNCTION__;
-  ANN_TRACE("--#%d %s\n", tid, name);
-  DO_CREQ_v_v(TSREQ_FLUSH_EXPECTED_RACES);
-}
-
-ANN_FUNC(void, AnnotateBenignRace, const char *file, int line, void *mem, char *description)
-{
-  const char *name = "AnnotateBenignRace";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mem, file, line);
-  DO_CREQ_v_WWW(TSREQ_BENIGN_RACE, void*,mem, long, 1, char*,description);
-}
-
-ANN_FUNC(void, AnnotateBenignRaceSized, const char *file, int line, void *mem, long size, char *description)
-{
-  const char *name = "AnnotateBenignRace";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mem, file, line);
-  DO_CREQ_v_WWW(TSREQ_BENIGN_RACE, char*,(char*)mem, long, size,
-                char*,description);
-}
-
-ANN_FUNC(void, WTFAnnotateBenignRaceSized, const char *file, int line, void *mem, long size, char *description)
-{
-  const char *name = "WTFAnnotateBenignRace";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mem, file, line);
-  DO_CREQ_v_WWW(TSREQ_BENIGN_RACE, char*,(char*)mem, long, size,
-                char*,description);
-}
-
-
-ANN_FUNC(void, AnnotateNewMemory, char *file, int line, void *mem, long size)
-{
-  const char *name = "AnnotateNewMemory";
-  ANN_TRACE("--#%d %s[%p,%d] %s:%d\n", tid, name, mem, (int)size, file, line);
-  DO_CREQ_v_WWWW(TSREQ_CLEAN_MEMORY, void*,mem, long, size, char*, file, long, (long)line);
-}
-
-ANN_FUNC(void, AnnotatePublishMemoryRange, char *file, int line, void *mem, long size)
-{
-  const char *name = "AnnotatePublishMemoryRange";
-  ANN_TRACE("--#%d %s[%p,%d] %s:%d\n", tid, name, mem, (int)size, file, line);
-  DO_CREQ_v_WW(TSREQ_PUBLISH_MEMORY_RANGE,   void*, mem, long, size);
-}
-
-ANN_FUNC(void, AnnotateUnpublishMemoryRange, char *file, int line, void *mem, long size)
-{
-  const char *name = "AnnotateUnpublishMemoryRange";
-  ANN_TRACE("--#%d %s[%p,%d] %s:%d\n", tid, name, mem, (int)size, file, line);
-  DO_CREQ_v_WW(TSREQ_UNPUBLISH_MEMORY_RANGE,   void*, mem, long, size);
-}
-
-ANN_FUNC(void, AnnotateIgnoreReadsBegin, char *file, int line, void *mu)
-{
-  const char *name = "AnnotateIgnoreReadsBegin";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mu, file, line);
-  DO_CREQ_v_W(TSREQ_IGNORE_READS_BEGIN,   void*, mu);
-}
-
-ANN_FUNC(void, AnnotateIgnoreReadsEnd, char *file, int line, void *mu)
-{
-  const char *name = "AnnotateIgnoreReadsEnd";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mu, file, line);
-  DO_CREQ_v_W(TSREQ_IGNORE_READS_END,   void*, mu);
-}
-
-ANN_FUNC(void, AnnotateIgnoreWritesBegin, char *file, int line, void *mu)
-{
-  const char *name = "AnnotateIgnoreWritesBegin";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mu, file, line);
-  DO_CREQ_v_W(TSREQ_IGNORE_WRITES_BEGIN,   void*, mu);
-}
-
-ANN_FUNC(void, AnnotateIgnoreWritesEnd, char *file, int line, void *mu)
-{
-  const char *name = "AnnotateIgnoreWritesEnd";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mu, file, line);
-  DO_CREQ_v_W(TSREQ_IGNORE_WRITES_END,   void*, mu);
-}
-
-ANN_FUNC(void, AnnotateIgnoreSyncBegin, char* file, int line, void *mu)
-{
-  const char *name = "AnnotateIgnoreSyncBegin";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mu, file, line);
-  DO_CREQ_v_W(TSREQ_IGNORE_ALL_SYNC_BEGIN,  void*, mu);
-}
-
-ANN_FUNC(void, AnnotateIgnoreSyncEnd, char* file, int line, void *mu)
-{
-  const char *name = "AnnotateIgnoreSyncEnd";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mu, file, line);
-  DO_CREQ_v_W(TSREQ_IGNORE_ALL_SYNC_END,  void*, mu);
-}
-
-ANN_FUNC(void, AnnotateEnableRaceDetection, char *file, int line, int enable)
-{
-  const char *name = "AnnotateEnableRaceDetection";
-  ANN_TRACE("--#%d %s[%d] %s:%d\n", tid, name, enable, file, line);
-  DO_CREQ_v_W(enable == 0 ? TSREQ_GLOBAL_IGNORE_ON : TSREQ_GLOBAL_IGNORE_OFF,
-              long, 0);
-}
-
-ANN_FUNC(void, AnnotateThreadName, char *file, int line, const char *thread_name)
-{
-  const char *name = "AnnotateThreadName";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, thread_name, file, line);
-  DO_CREQ_v_W(TSREQ_SET_THREAD_NAME, const char *, thread_name);
-}
-
-ANN_FUNC(void, AnnotateMutexIsUsedAsCondVar, char *file, int line, void *mu)
-{
-  const char *name = "AnnotateMutexIsUsedAsCondVar";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mu, file, line);
-  DO_CREQ_v_W(TSREQ_MUTEX_IS_USED_AS_CONDVAR,   void*, mu);
-}
-
-ANN_FUNC(void, AnnotateMutexIsNotPHB, char *file, int line, void *mu)
-{
-  const char *name = "AnnotateMutexIsNotPhb";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mu, file, line);
-  DO_CREQ_v_W(TSREQ_MUTEX_IS_NOT_PHB, void*, mu);
-}
-
-ANN_FUNC(void, AnnotateTraceMemory, char *file, int line, void *mem)
-{
-  const char *name = "AnnotateTraceMemory";
-  ANN_TRACE("--#%d %s[%p] %s:%d\n", tid, name, mem, file, line);
-  DO_CREQ_v_W(TSREQ_TRACE_MEM,   void*, mem);
-}
-
-#undef TRACE_ANN_FNS
-#define TRACE_ANN_FNS 1
-
-ANN_FUNC(void, AnnotateNoOp, char *file, int line, void *mem)
-{
-  const char *name = "AnnotateNoOp";
-  IGNORE_ALL_ACCESSES_BEGIN();
-  ANN_TRACE("--#%d/%d %s[%p] %s:%d\n", tid, sid, name, mem, file, line);
-  IGNORE_ALL_ACCESSES_END();
-}
-
-ANN_FUNC(void, AnnotateSetVerbosity, char *file, int line, void *mem)
-{
-  const char *name = "AnnotateSetVerbosity";
-  OrigFn fn;
-  VALGRIND_GET_ORIG_FN(fn);
-  fprintf(stderr, "%s fn=%p\n", name, (void*)fn.nraddr);
-  ANN_TRACE("--#%d/%d %s[%p] %s:%d\n", tid, sid, name, mem, file, line);
-}
-
-
-
-//-------------- NaCl Support -------------- {{{1
-// A bit hackish implementation of NaCl support.
-// We need to notify the valgrind core about
-//   a) nacl memory range
-//   b) nacl .nexe file
-#include "coregrind/pub_core_clreq.h"
-
-void I_WRAP_SONAME_FNNAME_ZZ(NONE, NaClSandboxMemoryStartForValgrind) (void *mem_start);
-void I_WRAP_SONAME_FNNAME_ZZ(NONE, NaClSandboxMemoryStartForValgrind) (void *mem_start) {
-  OrigFn fn;
-  int res;
-  VALGRIND_GET_ORIG_FN(fn);
-  CALL_FN_v_W(fn, mem_start);
-  VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__NACL_MEM_START, mem_start, 0, 0, 0, 0);
-}
-
-int I_WRAP_SONAME_FNNAME_ZZ(NONE, NaClFileNameForValgrind) (char *file);
-int I_WRAP_SONAME_FNNAME_ZZ(NONE, NaClFileNameForValgrind) (char *file) {
-  OrigFn fn;
-  int ret, res;
-  VALGRIND_GET_ORIG_FN(fn);
-  CALL_FN_W_W(ret, fn, file);
-  VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__NACL_FILE, file, 0, 0, 0, 0);
-  return ret;
-}
-
-void I_WRAP_SONAME_FNNAME_ZZ(NONE, NaClFileMappingForValgrind) (UWord vma, UWord size, UWord file_offset);
-void I_WRAP_SONAME_FNNAME_ZZ(NONE, NaClFileMappingForValgrind) (UWord vma, UWord size, UWord file_offset) {
-  OrigFn fn;
-  int res;
-  VALGRIND_GET_ORIG_FN(fn);
-  CALL_FN_v_WWW(fn, vma, size, file_offset);
-  VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__NACL_MMAP, vma, size, file_offset, 0, 0);
-}
-
-
-//-------------- Functions to Ignore -------------- {{{1
-// For some functions we want to ignore everything that happens
-// after they were called and before they returned.
-// Is there any way that allows to do this via a command line?
-#define WRAP_AND_IGNORE(soname, fnname) \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *a1, void *a2, void *a3, void *a4); \
-  void* I_WRAP_SONAME_FNNAME_ZU(soname,fnname) (void *a1, void *a2, void *a3, void *a4) { \
-    void* ret; \
-    OrigFn fn;\
-    VALGRIND_GET_ORIG_FN(fn);\
-    IGNORE_ALL_ACCESSES_BEGIN(); \
-      CALL_FN_W_WWWW(ret, fn, a1, a2, a3, a4); \
-    IGNORE_ALL_ACCESSES_END(); \
-    return ret; \
-  }
-
-WRAP_AND_IGNORE(NONE, getenv);
-
-// {{{1 end
-// vim:shiftwidth=2:softtabstop=2:expandtab
diff --git a/tsan/ts_valgrind_libc.cc b/tsan/ts_valgrind_libc.cc
deleted file mode 100644
index c73ed6a..0000000
--- a/tsan/ts_valgrind_libc.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright (c) 2008-2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// This file is part of ThreadSanitizer, a dynamic data race detector.
-// Author: Konstantin Serebryany.
-// Implement some of the libc functions to link with valgrind.
-// Do not include any linux header here to avoid conflicts.
-//
-extern "C" {
-#include "pub_tool_basics.h"
-#include "pub_tool_libcbase.h"
-}
-
-// can't use VG_(memmove) since it is buggy.
-extern "C" void * memmove(void *a, const void *b, unsigned long size) {
-  char *A = (char*)a;
-  const char *B = (const char*)b;
-  if (A < B) {
-    for (unsigned long i = 0; i < size; i++) {
-      A[i] = B[i];
-    }
-  } else if(A > B) {
-    for (unsigned long i = 0; i < size; i++) {
-      A[size - i - 1] = B[size - i - 1];
-    }
-  }
-  return a;
-}
-
-extern "C" int memcmp(const void *a, const void *b, unsigned long c) {
-  return VG_(memcmp)(a,b,c);
-}
-#ifndef VGO_darwin
-extern "C" void* __memcpy_chk(void *dest, const void *src, unsigned long n) {
-   return VG_(memcpy)(dest,src,n);
-}
-#endif
diff --git a/tsan/tsan-debug.bat b/tsan/tsan-debug.bat
deleted file mode 100755
index 900234d..0000000
--- a/tsan/tsan-debug.bat
+++ /dev/null
@@ -1,3 +0,0 @@
-@echo off
-"%~dp0\ia32\bin\pin.exe" -p32 "%~dp0\ia32\bin\pin.exe" -follow_execv -t "%~dp0\x86-windows-debug-ts_pin.dll"   -short_name %* 
-
diff --git a/tsan/tsan.bat b/tsan/tsan.bat
deleted file mode 100755
index 53d78c6..0000000
--- a/tsan/tsan.bat
+++ /dev/null
@@ -1,3 +0,0 @@
-@echo off
-"%~dp0\ia32\bin\pin.exe" -p32 "%~dp0\ia32\bin\pin.exe" -follow_execv -t "%~dp0\x86-windows-ts_pin.dll"   -short_name %* 
-
diff --git a/tsan/tsan_dynamorio.sh b/tsan/tsan_dynamorio.sh
deleted file mode 100755
index 8b87d1a..0000000
--- a/tsan/tsan_dynamorio.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-DYNAMORIO_ROOT=${DYNAMORIO_ROOT:=$HOME/DynamoRIO}
-TS_ROOT=${TS_ROOT:-`dirname $0`}
-TS_VARIANT=-debug
-
-TS_FLAGS=" "
-
-for arg in "$@"; do
-  case $arg in
-    --opt) TS_VARIANT="";;
-    --dbg) TS_VARIANT="-debug";;
-    --) shift; break;;
-    -64) ARCH="amd64"; BITNESS="64";;
-    -32) ARCH="x86"; BITNESS="32";;
-    -*) TS_FLAGS="$TS_FLAGS $arg";;
-    *) break;;
-  esac
-  shift
-done
-
-PROGRAM="$1"
-shift
-PARAMS="$@"
-
-# detect bitness if not given explicitly.
-if [ "$BITNESS" == "" ]; then
-  file_format=`objdump -f  $PROGRAM | grep -o 'file format elf.*'`
-  echo $file_format
-  if [ "$file_format" == "file format elf64-x86-64" ]; then
-    BITNESS=64
-    ARCH=amd64
-  else
-    BITNESS=32
-    ARCH=x86
-  fi
-fi
-
-SYMBOLS_FILE="$(mktemp symbols.XXXXXX)"
-nm $PROGRAM > $SYMBOLS_FILE
-TS_FLAGS="$TS_FLAGS --symbols=$SYMBOLS_FILE"
-
-$DYNAMORIO_ROOT/bin$BITNESS/drdeploy \
-   -client $TS_ROOT/bin/$ARCH-linux-debug-ts_dynamorio.so 0 "$TS_FLAGS" \
-   $PROGRAM $PARAMS
-rm $SYMBOLS_FILE
diff --git a/tsan/tsan_mt.bat b/tsan/tsan_mt.bat
deleted file mode 100755
index 9a5dddc..0000000
--- a/tsan/tsan_mt.bat
+++ /dev/null
@@ -1,3 +0,0 @@
-@echo off
-"%~dp0\ia32\bin\pin.exe" -p32 "%~dp0\ia32\bin\pin.exe" -follow_execv -t "%~dp0\x86-windows-ts_pinmt.dll"   -short_name %*
-
diff --git a/tsan/tsan_pin.sh b/tsan/tsan_pin.sh
deleted file mode 100755
index 2a2ed63..0000000
--- a/tsan/tsan_pin.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash
-
-PIN_ROOT=${PIN_ROOT:-$HOME/pin}
-TS_ROOT=${TS_ROOT:-`dirname $0`}
-TS_VARIANT=-debug
-MT=
-
-UNAME_OS=`uname -o`
-if [ "$UNAME_OS" == "GNU/Linux" ]; then
-  PIN_BINARY=${PIN_BINARY:-pin}
-  DLL=so
-  OS=linux
-elif [ "$UNAME_OS" == "Cygwin" ]; then
-  PIN_BINARY=${PIN_BINARY:-pin.bat}
-  DLL=dll
-  OS=windows
-fi
-
-export MSM_THREAD_SANITIZER=1
-export INSTRUMENTATION_FRAMEWORK=PIN_LINUX
-
-
-FOLLOW=-follow_execv
-PIN_FLAGS=${PIN_FLAGS:-""}
-
-TS_FLAGS="-short_name"
-PIN_FLAGS=""
-
-VERBOZE=0
-
-for arg in "$@"; do
-  case $arg in
-    --opt) TS_VARIANT="";;
-    --dbg) TS_VARIANT="-debug";;
-    --mt)  MT=mt;;
-    --v=[1-9]) VERBOZE=1; TS_FLAGS="$TS_FLAGS $arg";;
-    --) shift; break;;
-    -*) TS_FLAGS="$TS_FLAGS $arg";;
-    *) break;;
-  esac
-  shift
-done
-
-
-ulimit -c 0 # core make very little sense here
-
-if [ $VERBOZE == "1" ] ; then
-  printf "PIN_ROOT   : %s\n" "$PIN_ROOT"
-  printf "PIN_BINARY : %s\n" "$PIN_BINARY"
-  printf "PIN_FLAGS  : %s\n" "$PIN_FLAGS"
-  printf "TS_ROOT    : %s\n" "$TS_ROOT"
-  printf "TS_VARIANT : %s\n" "$TS_VARIANT"
-  printf "TS_FLAGS   : %s\n" "$TS_FLAGS"
-  printf "PARAMS     : %s\n" "$*"
-fi
-
-if [ "$TS_VARIANT" == "-debug" ] ; then
-  TS_FLAGS="$TS_FLAGS -slow_asserts"
-fi
-
-TS_PARAMS="$@"
-
-run() {
-  echo $@
-  $@
-}
-
-run $PIN_ROOT/$PIN_BINARY $PIN_FLAGS $FOLLOW \
-  -t64 $TS_ROOT/bin/amd64-$OS${TS_VARIANT}-ts_pin$MT.$DLL \
-  -t   $TS_ROOT/bin/x86-$OS${TS_VARIANT}-ts_pin$MT.$DLL \
- $TS_FLAGS -- $TS_PARAMS
-
