merge in jb-release history after reset to jb-dev
diff --git a/include/core/SkPixelRef.h b/include/core/SkPixelRef.h
index d5f6ab2..f01ba15 100644
--- a/include/core/SkPixelRef.h
+++ b/include/core/SkPixelRef.h
@@ -63,9 +63,10 @@
     */
     SkColorTable* colorTable() const { return fColorTable; }
 
-    /** Return the current lockcount (defaults to 0)
-    */
-    int getLockCount() const { return fLockCount; }
+    /**
+     *  Returns true if the lockcount > 0
+     */
+    bool isLocked() const { return fLockCount > 0; }
 
     /** Call to access the pixel memory, which is returned. Balance with a call
         to unlockPixels().
@@ -205,6 +206,18 @@
 
     SkPixelRef(SkFlattenableReadBuffer&, SkBaseMutex*);
 
+    // only call from constructor. Flags this to always be locked, removing
+    // the need to grab the mutex and call onLockPixels/onUnlockPixels.
+    // Performance tweak to avoid those calls (esp. in multi-thread use case).
+    void setPreLocked(void* pixels, SkColorTable* ctable);
+
+    // only call from constructor. Specify a (possibly) different mutex, or
+    // null to use the default. Use with caution.
+    // The default logic is to provide a mutex, but possibly one that is
+    // shared with other instances, though this sharing is implementation
+    // specific, and it is legal for each instance to have its own mutex.
+    void useDefaultMutex() { this->setMutex(NULL); }
+
 private:
 #if !SK_ALLOW_STATIC_GLOBAL_INITIALIZERS
     static void InitializeFlattenables();
@@ -221,6 +234,10 @@
 
     // can go from false to true, but never from true to false
     bool    fIsImmutable;
+    // only ever set in constructor, const after that
+    bool    fPreLocked;
+
+    void setMutex(SkBaseMutex* mutex);
 
     friend class SkGraphics;
 };
diff --git a/include/core/SkThread_platform.h b/include/core/SkThread_platform.h
index 863f6e3..58311e1 100644
--- a/include/core/SkThread_platform.h
+++ b/include/core/SkThread_platform.h
@@ -75,6 +75,8 @@
 // Special case used when the static mutex must be available globally.
 #define SK_DECLARE_GLOBAL_MUTEX(name)   SkBaseMutex  name = { PTHREAD_MUTEX_INITIALIZER }
 
+#define SK_DECLARE_MUTEX_ARRAY(name, count)    SkBaseMutex name[count] = { PTHREAD_MUTEX_INITIALIZER }
+
 // A normal mutex that requires to be initialized through normal C++ construction,
 // i.e. when it's a member of another class, or allocated on the heap.
 class SkMutex : public SkBaseMutex, SkNoncopyable {
@@ -106,8 +108,9 @@
 
 typedef SkMutex SkBaseMutex;
 
-#define SK_DECLARE_STATIC_MUTEX(name)  static SkBaseMutex  name
-#define SK_DECLARE_GLOBAL_MUTEX(name)  SkBaseMutex  name
+#define SK_DECLARE_STATIC_MUTEX(name)           static SkBaseMutex  name
+#define SK_DECLARE_GLOBAL_MUTEX(name)           SkBaseMutex  name
+#define SK_DECLARE_MUTEX_ARRAY(name, count)     SkBaseMutex name[count]
 
 #endif // !SK_USE_POSIX_THREADS
 
diff --git a/src/core/SkBitmap.cpp b/src/core/SkBitmap.cpp
index 58d0bd8..0b98513 100644
--- a/src/core/SkBitmap.cpp
+++ b/src/core/SkBitmap.cpp
@@ -284,7 +284,7 @@
 void SkBitmap::updatePixelsFromRef() const {
     if (NULL != fPixelRef) {
         if (fPixelLockCount > 0) {
-            SkASSERT(fPixelRef->getLockCount() > 0);
+            SkASSERT(fPixelRef->isLocked());
 
             void* p = fPixelRef->pixels();
             if (NULL != p) {
@@ -1533,7 +1533,7 @@
 #if 0   // these asserts are not thread-correct, so disable for now
     if (fPixelRef) {
         if (fPixelLockCount > 0) {
-            SkASSERT(fPixelRef->getLockCount() > 0);
+            SkASSERT(fPixelRef->isLocked());
         } else {
             SkASSERT(NULL == fPixels);
             SkASSERT(NULL == fColorTable);
diff --git a/src/core/SkBitmapProcShader.cpp b/src/core/SkBitmapProcShader.cpp
index 32b6f9e..6d64716 100644
--- a/src/core/SkBitmapProcShader.cpp
+++ b/src/core/SkBitmapProcShader.cpp
@@ -173,7 +173,7 @@
 
     SkASSERT(state.fBitmap->getPixels());
     SkASSERT(state.fBitmap->pixelRef() == NULL ||
-             state.fBitmap->pixelRef()->getLockCount());
+             state.fBitmap->pixelRef()->isLocked());
 
     for (;;) {
         int n = count;
@@ -217,7 +217,7 @@
 
     SkASSERT(state.fBitmap->getPixels());
     SkASSERT(state.fBitmap->pixelRef() == NULL ||
-             state.fBitmap->pixelRef()->getLockCount());
+             state.fBitmap->pixelRef()->isLocked());
 
     for (;;) {
         int n = count;
diff --git a/src/core/SkMallocPixelRef.cpp b/src/core/SkMallocPixelRef.cpp
index 3e22075..72dbb3d 100644
--- a/src/core/SkMallocPixelRef.cpp
+++ b/src/core/SkMallocPixelRef.cpp
@@ -18,6 +18,8 @@
     fSize = size;
     fCTable = ctable;
     SkSafeRef(ctable);
+    
+    this->setPreLocked(fStorage, fCTable);
 }
 
 SkMallocPixelRef::~SkMallocPixelRef() {
@@ -57,6 +59,8 @@
     } else {
         fCTable = NULL;
     }
+
+    this->setPreLocked(fStorage, fCTable);
 }
 
 SK_DEFINE_PIXEL_REF_REGISTRAR(SkMallocPixelRef)
diff --git a/src/core/SkPictureRecord.cpp b/src/core/SkPictureRecord.cpp
index ba49b72..fdeec3f 100644
--- a/src/core/SkPictureRecord.cpp
+++ b/src/core/SkPictureRecord.cpp
@@ -319,7 +319,7 @@
 
 void SkPictureRecord::drawText(const void* text, size_t byteLength, SkScalar x,
                       SkScalar y, const SkPaint& paint) {
-    bool fast = paint.canComputeFastBounds();
+    bool fast = !paint.isVerticalText() && paint.canComputeFastBounds();
 
     addDraw(fast ? DRAW_TEXT_TOP_BOTTOM : DRAW_TEXT);
     addPaint(paint);
@@ -356,7 +356,7 @@
         }
     }
 
-    bool fastBounds = paint.canComputeFastBounds();
+    bool fastBounds = !paint.isVerticalText() && paint.canComputeFastBounds();
     bool fast = canUseDrawH && fastBounds;
 
     if (fast) {
@@ -404,7 +404,7 @@
     if (0 == points)
         return;
 
-    bool fast = paint.canComputeFastBounds();
+    bool fast = !paint.isVerticalText() && paint.canComputeFastBounds();
 
     addDraw(fast ? DRAW_POS_TEXT_H_TOP_BOTTOM : DRAW_POS_TEXT_H);
     addPaint(paint);
diff --git a/src/core/SkPixelRef.cpp b/src/core/SkPixelRef.cpp
index d5e1b81..1279f06 100644
--- a/src/core/SkPixelRef.cpp
+++ b/src/core/SkPixelRef.cpp
@@ -9,7 +9,28 @@
 #include "SkFlattenable.h"
 #include "SkThread.h"
 
-SK_DECLARE_STATIC_MUTEX(gPixelRefMutex);
+// must be a power-of-2. undef to just use 1 mutex
+#define PIXELREF_MUTEX_RING_COUNT       32
+
+#ifdef PIXELREF_MUTEX_RING_COUNT
+    static int32_t gPixelRefMutexRingIndex;
+    static SK_DECLARE_MUTEX_ARRAY(gPixelRefMutexRing, PIXELREF_MUTEX_RING_COUNT);
+#else
+    SK_DECLARE_STATIC_MUTEX(gPixelRefMutex);
+#endif
+
+SkBaseMutex* get_default_mutex() {
+#ifdef PIXELREF_MUTEX_RING_COUNT
+    // atomic_inc might be overkill here. It may be fine if once in a while
+    // we hit a race-condition and two subsequent calls get the same index...
+    int index = sk_atomic_inc(&gPixelRefMutexRingIndex);
+    return &gPixelRefMutexRing[index & (PIXELREF_MUTEX_RING_COUNT - 1)];
+#else
+    return &gPixelRefMutex;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
 
 extern int32_t SkNextPixelRefGenerationID();
 int32_t SkNextPixelRefGenerationID() {
@@ -23,29 +44,45 @@
     return genID;
 }
 
+///////////////////////////////////////////////////////////////////////////////
 
-SkPixelRef::SkPixelRef(SkBaseMutex* mutex) {
+void SkPixelRef::setMutex(SkBaseMutex* mutex) {
     if (NULL == mutex) {
-        mutex = &gPixelRefMutex;
+        mutex = get_default_mutex();
     }
     fMutex = mutex;
+}
+
+// just need a > 0 value, so pick a funny one to aid in debugging
+#define SKPIXELREF_PRELOCKED_LOCKCOUNT     123456789
+
+SkPixelRef::SkPixelRef(SkBaseMutex* mutex) : fPreLocked(false) {
+    this->setMutex(mutex);
     fPixels = NULL;
     fColorTable = NULL; // we do not track ownership of this
     fLockCount = 0;
     fGenerationID = 0;  // signal to rebuild
     fIsImmutable = false;
+    fPreLocked = false;
 }
 
 SkPixelRef::SkPixelRef(SkFlattenableReadBuffer& buffer, SkBaseMutex* mutex) {
-    if (NULL == mutex) {
-        mutex = &gPixelRefMutex;
-    }
-    fMutex = mutex;
+    this->setMutex(mutex);
     fPixels = NULL;
     fColorTable = NULL; // we do not track ownership of this
     fLockCount = 0;
     fGenerationID = 0;  // signal to rebuild
     fIsImmutable = buffer.readBool();
+    fPreLocked = false;
+}
+
+void SkPixelRef::setPreLocked(void* pixels, SkColorTable* ctable) {
+    // only call me in your constructor, otherwise fLockCount tracking can get
+    // out of sync.
+    fPixels = pixels;
+    fColorTable = ctable;
+    fLockCount = SKPIXELREF_PRELOCKED_LOCKCOUNT;
+    fPreLocked = true;
 }
 
 void SkPixelRef::flatten(SkFlattenableWriteBuffer& buffer) const {
@@ -53,21 +90,29 @@
 }
 
 void SkPixelRef::lockPixels() {
-    SkAutoMutexAcquire  ac(*fMutex);
+    SkASSERT(!fPreLocked || SKPIXELREF_PRELOCKED_LOCKCOUNT == fLockCount);
 
-    if (1 == ++fLockCount) {
-        fPixels = this->onLockPixels(&fColorTable);
+    if (!fPreLocked) {
+        SkAutoMutexAcquire  ac(*fMutex);
+
+        if (1 == ++fLockCount) {
+            fPixels = this->onLockPixels(&fColorTable);
+        }
     }
 }
 
 void SkPixelRef::unlockPixels() {
-    SkAutoMutexAcquire  ac(*fMutex);
+    SkASSERT(!fPreLocked || SKPIXELREF_PRELOCKED_LOCKCOUNT == fLockCount);
+    
+    if (!fPreLocked) {
+        SkAutoMutexAcquire  ac(*fMutex);
 
-    SkASSERT(fLockCount > 0);
-    if (0 == --fLockCount) {
-        this->onUnlockPixels();
-        fPixels = NULL;
-        fColorTable = NULL;
+        SkASSERT(fLockCount > 0);
+        if (0 == --fLockCount) {
+            this->onUnlockPixels();
+            fPixels = NULL;
+            fColorTable = NULL;
+        }
     }
 }
 
diff --git a/src/images/SkImageRefPool.cpp b/src/images/SkImageRefPool.cpp
index bfa933e..c24dba0 100644
--- a/src/images/SkImageRefPool.cpp
+++ b/src/images/SkImageRefPool.cpp
@@ -59,7 +59,7 @@
     
     while (NULL != ref && fRAMUsed > limit) {
         // only purge it if its pixels are unlocked
-        if (0 == ref->getLockCount() && ref->fBitmap.getPixels()) {
+        if (!ref->isLocked() && ref->fBitmap.getPixels()) {
             size_t size = ref->ramUsed();
             SkASSERT(size <= fRAMUsed);
             fRAMUsed -= size;
@@ -181,10 +181,10 @@
     SkImageRef* ref = fHead;
     
     while (ref != NULL) {
-        SkDebugf("  [%3d %3d %d] ram=%d data=%d locks=%d %s\n", ref->fBitmap.width(),
+        SkDebugf("  [%3d %3d %d] ram=%d data=%d locked=%d %s\n", ref->fBitmap.width(),
                  ref->fBitmap.height(), ref->fBitmap.config(),
                  ref->ramUsed(), (int)ref->fStream->getLength(),
-                 ref->getLockCount(), ref->getURI());
+                 ref->isLocked(), ref->getURI());
         
         ref = ref->fNext;
     }
diff --git a/src/ports/SkImageRef_ashmem.cpp b/src/ports/SkImageRef_ashmem.cpp
index f9c6aff..46ebb0d 100644
--- a/src/ports/SkImageRef_ashmem.cpp
+++ b/src/ports/SkImageRef_ashmem.cpp
@@ -41,6 +41,8 @@
     fRec.fPinned = false;
             
     fCT = NULL;
+            
+    this->useDefaultMutex();   // we don't need/want the shared imageref mutex
 }
 
 SkImageRef_ashmem::~SkImageRef_ashmem() {
@@ -235,6 +237,7 @@
         buffer.read(buf, length);
         setURI(buf, length);
     }
+    this->useDefaultMutex();   // we don't need/want the shared imageref mutex
 }
 
 SkPixelRef* SkImageRef_ashmem::Create(SkFlattenableReadBuffer& buffer) {