Cleanup pixel ref mutexes in Skia

Mutexes in pixelrefs were done very sloppily initially. The code (a) assumes all
pixelref subclasses want a mutex to guard their lock/unlock virtuals, and (b)
most subclasses use the same mutex for *all* of their instances, even when there
is no explicit need to guard modifying one instances with another.

When we try drawing bitmaps from multiple threads, we are seeing a lot of slow-
down from these mutexes. This CL has two changes to try to speed things up.

1. Add setPreLocked(), for pixelrefs who never need the onLockPixels
virtual to be called. This speeds up those subclasses in multithreaded environs
as it avoids the mutex lock all together (e.g. SkMallocPixelRef).

2. Add setMutex() to allow a subclass to change the mutex choice. ashmem wants
this, since its unflattening constructor cannot pass down the null, it needs
to cleanup afterwards.

see https://codereview.appspot.com/6199075/

bug: 6469917
Change-Id: I81a7cfa0b2ead5a42059697eafa58de1e7a87da2
diff --git a/include/core/SkPixelRef.h b/include/core/SkPixelRef.h
index d5f6ab2..f01ba15 100644
--- a/include/core/SkPixelRef.h
+++ b/include/core/SkPixelRef.h
@@ -63,9 +63,10 @@
     */
     SkColorTable* colorTable() const { return fColorTable; }
 
-    /** Return the current lockcount (defaults to 0)
-    */
-    int getLockCount() const { return fLockCount; }
+    /**
+     *  Returns true if the lockcount > 0
+     */
+    bool isLocked() const { return fLockCount > 0; }
 
     /** Call to access the pixel memory, which is returned. Balance with a call
         to unlockPixels().
@@ -205,6 +206,18 @@
 
     SkPixelRef(SkFlattenableReadBuffer&, SkBaseMutex*);
 
+    // only call from constructor. Flags this to always be locked, removing
+    // the need to grab the mutex and call onLockPixels/onUnlockPixels.
+    // Performance tweak to avoid those calls (esp. in multi-thread use case).
+    void setPreLocked(void* pixels, SkColorTable* ctable);
+
+    // only call from constructor. Specify a (possibly) different mutex, or
+    // null to use the default. Use with caution.
+    // The default logic is to provide a mutex, but possibly one that is
+    // shared with other instances, though this sharing is implementation
+    // specific, and it is legal for each instance to have its own mutex.
+    void useDefaultMutex() { this->setMutex(NULL); }
+
 private:
 #if !SK_ALLOW_STATIC_GLOBAL_INITIALIZERS
     static void InitializeFlattenables();
@@ -221,6 +234,10 @@
 
     // can go from false to true, but never from true to false
     bool    fIsImmutable;
+    // only ever set in constructor, const after that
+    bool    fPreLocked;
+
+    void setMutex(SkBaseMutex* mutex);
 
     friend class SkGraphics;
 };
diff --git a/include/core/SkThread_platform.h b/include/core/SkThread_platform.h
index 863f6e3..58311e1 100644
--- a/include/core/SkThread_platform.h
+++ b/include/core/SkThread_platform.h
@@ -75,6 +75,8 @@
 // Special case used when the static mutex must be available globally.
 #define SK_DECLARE_GLOBAL_MUTEX(name)   SkBaseMutex  name = { PTHREAD_MUTEX_INITIALIZER }
 
+#define SK_DECLARE_MUTEX_ARRAY(name, count)    SkBaseMutex name[count] = { PTHREAD_MUTEX_INITIALIZER }
+
 // A normal mutex that requires to be initialized through normal C++ construction,
 // i.e. when it's a member of another class, or allocated on the heap.
 class SkMutex : public SkBaseMutex, SkNoncopyable {
@@ -106,8 +108,9 @@
 
 typedef SkMutex SkBaseMutex;
 
-#define SK_DECLARE_STATIC_MUTEX(name)  static SkBaseMutex  name
-#define SK_DECLARE_GLOBAL_MUTEX(name)  SkBaseMutex  name
+#define SK_DECLARE_STATIC_MUTEX(name)           static SkBaseMutex  name
+#define SK_DECLARE_GLOBAL_MUTEX(name)           SkBaseMutex  name
+#define SK_DECLARE_MUTEX_ARRAY(name, count)     SkBaseMutex name[count]
 
 #endif // !SK_USE_POSIX_THREADS
 
diff --git a/src/core/SkBitmap.cpp b/src/core/SkBitmap.cpp
index 58d0bd8..0b98513 100644
--- a/src/core/SkBitmap.cpp
+++ b/src/core/SkBitmap.cpp
@@ -284,7 +284,7 @@
 void SkBitmap::updatePixelsFromRef() const {
     if (NULL != fPixelRef) {
         if (fPixelLockCount > 0) {
-            SkASSERT(fPixelRef->getLockCount() > 0);
+            SkASSERT(fPixelRef->isLocked());
 
             void* p = fPixelRef->pixels();
             if (NULL != p) {
@@ -1533,7 +1533,7 @@
 #if 0   // these asserts are not thread-correct, so disable for now
     if (fPixelRef) {
         if (fPixelLockCount > 0) {
-            SkASSERT(fPixelRef->getLockCount() > 0);
+            SkASSERT(fPixelRef->isLocked());
         } else {
             SkASSERT(NULL == fPixels);
             SkASSERT(NULL == fColorTable);
diff --git a/src/core/SkBitmapProcShader.cpp b/src/core/SkBitmapProcShader.cpp
index 32b6f9e..6d64716 100644
--- a/src/core/SkBitmapProcShader.cpp
+++ b/src/core/SkBitmapProcShader.cpp
@@ -173,7 +173,7 @@
 
     SkASSERT(state.fBitmap->getPixels());
     SkASSERT(state.fBitmap->pixelRef() == NULL ||
-             state.fBitmap->pixelRef()->getLockCount());
+             state.fBitmap->pixelRef()->isLocked());
 
     for (;;) {
         int n = count;
@@ -217,7 +217,7 @@
 
     SkASSERT(state.fBitmap->getPixels());
     SkASSERT(state.fBitmap->pixelRef() == NULL ||
-             state.fBitmap->pixelRef()->getLockCount());
+             state.fBitmap->pixelRef()->isLocked());
 
     for (;;) {
         int n = count;
diff --git a/src/core/SkMallocPixelRef.cpp b/src/core/SkMallocPixelRef.cpp
index 3e22075..72dbb3d 100644
--- a/src/core/SkMallocPixelRef.cpp
+++ b/src/core/SkMallocPixelRef.cpp
@@ -18,6 +18,8 @@
     fSize = size;
     fCTable = ctable;
     SkSafeRef(ctable);
+    
+    this->setPreLocked(fStorage, fCTable);
 }
 
 SkMallocPixelRef::~SkMallocPixelRef() {
@@ -57,6 +59,8 @@
     } else {
         fCTable = NULL;
     }
+
+    this->setPreLocked(fStorage, fCTable);
 }
 
 SK_DEFINE_PIXEL_REF_REGISTRAR(SkMallocPixelRef)
diff --git a/src/core/SkPixelRef.cpp b/src/core/SkPixelRef.cpp
index d5e1b81..1279f06 100644
--- a/src/core/SkPixelRef.cpp
+++ b/src/core/SkPixelRef.cpp
@@ -9,7 +9,28 @@
 #include "SkFlattenable.h"
 #include "SkThread.h"
 
-SK_DECLARE_STATIC_MUTEX(gPixelRefMutex);
+// must be a power-of-2. undef to just use 1 mutex
+#define PIXELREF_MUTEX_RING_COUNT       32
+
+#ifdef PIXELREF_MUTEX_RING_COUNT
+    static int32_t gPixelRefMutexRingIndex;
+    static SK_DECLARE_MUTEX_ARRAY(gPixelRefMutexRing, PIXELREF_MUTEX_RING_COUNT);
+#else
+    SK_DECLARE_STATIC_MUTEX(gPixelRefMutex);
+#endif
+
+SkBaseMutex* get_default_mutex() {
+#ifdef PIXELREF_MUTEX_RING_COUNT
+    // atomic_inc might be overkill here. It may be fine if once in a while
+    // we hit a race-condition and two subsequent calls get the same index...
+    int index = sk_atomic_inc(&gPixelRefMutexRingIndex);
+    return &gPixelRefMutexRing[index & (PIXELREF_MUTEX_RING_COUNT - 1)];
+#else
+    return &gPixelRefMutex;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
 
 extern int32_t SkNextPixelRefGenerationID();
 int32_t SkNextPixelRefGenerationID() {
@@ -23,29 +44,45 @@
     return genID;
 }
 
+///////////////////////////////////////////////////////////////////////////////
 
-SkPixelRef::SkPixelRef(SkBaseMutex* mutex) {
+void SkPixelRef::setMutex(SkBaseMutex* mutex) {
     if (NULL == mutex) {
-        mutex = &gPixelRefMutex;
+        mutex = get_default_mutex();
     }
     fMutex = mutex;
+}
+
+// just need a > 0 value, so pick a funny one to aid in debugging
+#define SKPIXELREF_PRELOCKED_LOCKCOUNT     123456789
+
+SkPixelRef::SkPixelRef(SkBaseMutex* mutex) : fPreLocked(false) {
+    this->setMutex(mutex);
     fPixels = NULL;
     fColorTable = NULL; // we do not track ownership of this
     fLockCount = 0;
     fGenerationID = 0;  // signal to rebuild
     fIsImmutable = false;
+    fPreLocked = false;
 }
 
 SkPixelRef::SkPixelRef(SkFlattenableReadBuffer& buffer, SkBaseMutex* mutex) {
-    if (NULL == mutex) {
-        mutex = &gPixelRefMutex;
-    }
-    fMutex = mutex;
+    this->setMutex(mutex);
     fPixels = NULL;
     fColorTable = NULL; // we do not track ownership of this
     fLockCount = 0;
     fGenerationID = 0;  // signal to rebuild
     fIsImmutable = buffer.readBool();
+    fPreLocked = false;
+}
+
+void SkPixelRef::setPreLocked(void* pixels, SkColorTable* ctable) {
+    // only call me in your constructor, otherwise fLockCount tracking can get
+    // out of sync.
+    fPixels = pixels;
+    fColorTable = ctable;
+    fLockCount = SKPIXELREF_PRELOCKED_LOCKCOUNT;
+    fPreLocked = true;
 }
 
 void SkPixelRef::flatten(SkFlattenableWriteBuffer& buffer) const {
@@ -53,21 +90,29 @@
 }
 
 void SkPixelRef::lockPixels() {
-    SkAutoMutexAcquire  ac(*fMutex);
+    SkASSERT(!fPreLocked || SKPIXELREF_PRELOCKED_LOCKCOUNT == fLockCount);
 
-    if (1 == ++fLockCount) {
-        fPixels = this->onLockPixels(&fColorTable);
+    if (!fPreLocked) {
+        SkAutoMutexAcquire  ac(*fMutex);
+
+        if (1 == ++fLockCount) {
+            fPixels = this->onLockPixels(&fColorTable);
+        }
     }
 }
 
 void SkPixelRef::unlockPixels() {
-    SkAutoMutexAcquire  ac(*fMutex);
+    SkASSERT(!fPreLocked || SKPIXELREF_PRELOCKED_LOCKCOUNT == fLockCount);
+    
+    if (!fPreLocked) {
+        SkAutoMutexAcquire  ac(*fMutex);
 
-    SkASSERT(fLockCount > 0);
-    if (0 == --fLockCount) {
-        this->onUnlockPixels();
-        fPixels = NULL;
-        fColorTable = NULL;
+        SkASSERT(fLockCount > 0);
+        if (0 == --fLockCount) {
+            this->onUnlockPixels();
+            fPixels = NULL;
+            fColorTable = NULL;
+        }
     }
 }
 
diff --git a/src/images/SkImageRefPool.cpp b/src/images/SkImageRefPool.cpp
index bfa933e..c24dba0 100644
--- a/src/images/SkImageRefPool.cpp
+++ b/src/images/SkImageRefPool.cpp
@@ -59,7 +59,7 @@
     
     while (NULL != ref && fRAMUsed > limit) {
         // only purge it if its pixels are unlocked
-        if (0 == ref->getLockCount() && ref->fBitmap.getPixels()) {
+        if (!ref->isLocked() && ref->fBitmap.getPixels()) {
             size_t size = ref->ramUsed();
             SkASSERT(size <= fRAMUsed);
             fRAMUsed -= size;
@@ -181,10 +181,10 @@
     SkImageRef* ref = fHead;
     
     while (ref != NULL) {
-        SkDebugf("  [%3d %3d %d] ram=%d data=%d locks=%d %s\n", ref->fBitmap.width(),
+        SkDebugf("  [%3d %3d %d] ram=%d data=%d locked=%d %s\n", ref->fBitmap.width(),
                  ref->fBitmap.height(), ref->fBitmap.config(),
                  ref->ramUsed(), (int)ref->fStream->getLength(),
-                 ref->getLockCount(), ref->getURI());
+                 ref->isLocked(), ref->getURI());
         
         ref = ref->fNext;
     }
diff --git a/src/ports/SkImageRef_ashmem.cpp b/src/ports/SkImageRef_ashmem.cpp
index f9c6aff..46ebb0d 100644
--- a/src/ports/SkImageRef_ashmem.cpp
+++ b/src/ports/SkImageRef_ashmem.cpp
@@ -41,6 +41,8 @@
     fRec.fPinned = false;
             
     fCT = NULL;
+            
+    this->useDefaultMutex();   // we don't need/want the shared imageref mutex
 }
 
 SkImageRef_ashmem::~SkImageRef_ashmem() {
@@ -235,6 +237,7 @@
         buffer.read(buf, length);
         setURI(buf, length);
     }
+    this->useDefaultMutex();   // we don't need/want the shared imageref mutex
 }
 
 SkPixelRef* SkImageRef_ashmem::Create(SkFlattenableReadBuffer& buffer) {