am 8c1c7cfa: Cherrypick change to skia

* commit '8c1c7cfacd82a174c65fcbf839d7037e3692aee8':
  Cherrypick change to skia
diff --git a/Android.mk b/Android.mk
index f1e81d2..08cf6f8 100644
--- a/Android.mk
+++ b/Android.mk
@@ -47,6 +47,14 @@
 	LOCAL_CFLAGS += -D__ARM_HAVE_NEON
 endif
 
+# special checks for alpha == 0 and alpha == 255 in S32A_Opaque_BlitRow32
+# procedures (C and assembly) seriously improve skia performance
+LOCAL_CFLAGS += -DTEST_SRC_ALPHA
+
+# using freetype's embolden allows us to adjust fake bold settings at
+# draw-time, at which point we know which SkTypeface is being drawn
+LOCAL_CFLAGS += -DSK_USE_FREETYPE_EMBOLDEN
+
 LOCAL_SRC_FILES:= \
 	src/core/Sk64.cpp \
 	src/core/SkAAClip.cpp \
diff --git a/include/core/SkPaint.h b/include/core/SkPaint.h
index 22d003e..0ec6698 100644
--- a/include/core/SkPaint.h
+++ b/include/core/SkPaint.h
@@ -13,6 +13,7 @@
 #include "SkColor.h"
 #include "SkDrawLooper.h"
 #include "SkXfermode.h"
+#include "SkString.h"
 
 class SkAutoGlyphCache;
 class SkColorFilter;
@@ -654,6 +655,18 @@
     */
     void    setTextAlign(Align align);
 
+#ifdef SK_BUILD_FOR_ANDROID
+    /** Return the paint's text locale value.
+        @return the paint's text locale value used for drawing text.
+    */
+    const SkString& getTextLocale() const { return fTextLocale; }
+
+    /** Set the paint's text locale.
+        @param locale set the paint's locale value for drawing text.
+    */
+    void    setTextLocale(const SkString& locale);
+#endif
+
     /** Return the paint's text size.
         @return the paint's text size.
     */
@@ -861,6 +874,9 @@
     /** Returns the base glyph count for the strike associated with this paint
     */
     unsigned getBaseGlyphCount(SkUnichar text) const;
+    
+    int utfToGlyphs(const void* text, TextEncoding encoding,
+            size_t byteLength, uint16_t glyphs[]) const;
 #endif
 
     // returns true if the paint's settings (e.g. xfermode + alpha) resolve to
@@ -892,6 +908,9 @@
     unsigned        fStyle : 2;
     unsigned        fTextEncoding : 2;  // 3 values
     unsigned        fHinting : 2;
+#ifdef SK_BUILD_FOR_ANDROID
+    SkString        fTextLocale;
+#endif
 
     SkDrawCacheProc    getDrawCacheProc() const;
     SkMeasureCacheProc getMeasureCacheProc(TextBufferDirection dir,
diff --git a/include/core/SkPixelRef.h b/include/core/SkPixelRef.h
index d5f6ab2..f01ba15 100644
--- a/include/core/SkPixelRef.h
+++ b/include/core/SkPixelRef.h
@@ -63,9 +63,10 @@
     */
     SkColorTable* colorTable() const { return fColorTable; }
 
-    /** Return the current lockcount (defaults to 0)
-    */
-    int getLockCount() const { return fLockCount; }
+    /**
+     *  Returns true if the lockcount > 0
+     */
+    bool isLocked() const { return fLockCount > 0; }
 
     /** Call to access the pixel memory, which is returned. Balance with a call
         to unlockPixels().
@@ -205,6 +206,18 @@
 
     SkPixelRef(SkFlattenableReadBuffer&, SkBaseMutex*);
 
+    // only call from constructor. Flags this to always be locked, removing
+    // the need to grab the mutex and call onLockPixels/onUnlockPixels.
+    // Performance tweak to avoid those calls (esp. in multi-thread use case).
+    void setPreLocked(void* pixels, SkColorTable* ctable);
+
+    // only call from constructor. Specify a (possibly) different mutex, or
+    // null to use the default. Use with caution.
+    // The default logic is to provide a mutex, but possibly one that is
+    // shared with other instances, though this sharing is implementation
+    // specific, and it is legal for each instance to have its own mutex.
+    void useDefaultMutex() { this->setMutex(NULL); }
+
 private:
 #if !SK_ALLOW_STATIC_GLOBAL_INITIALIZERS
     static void InitializeFlattenables();
@@ -221,6 +234,10 @@
 
     // can go from false to true, but never from true to false
     bool    fIsImmutable;
+    // only ever set in constructor, const after that
+    bool    fPreLocked;
+
+    void setMutex(SkBaseMutex* mutex);
 
     friend class SkGraphics;
 };
diff --git a/include/core/SkThread_platform.h b/include/core/SkThread_platform.h
index 863f6e3..58311e1 100644
--- a/include/core/SkThread_platform.h
+++ b/include/core/SkThread_platform.h
@@ -75,6 +75,8 @@
 // Special case used when the static mutex must be available globally.
 #define SK_DECLARE_GLOBAL_MUTEX(name)   SkBaseMutex  name = { PTHREAD_MUTEX_INITIALIZER }
 
+#define SK_DECLARE_MUTEX_ARRAY(name, count)    SkBaseMutex name[count] = { PTHREAD_MUTEX_INITIALIZER }
+
 // A normal mutex that requires to be initialized through normal C++ construction,
 // i.e. when it's a member of another class, or allocated on the heap.
 class SkMutex : public SkBaseMutex, SkNoncopyable {
@@ -106,8 +108,9 @@
 
 typedef SkMutex SkBaseMutex;
 
-#define SK_DECLARE_STATIC_MUTEX(name)  static SkBaseMutex  name
-#define SK_DECLARE_GLOBAL_MUTEX(name)  SkBaseMutex  name
+#define SK_DECLARE_STATIC_MUTEX(name)           static SkBaseMutex  name
+#define SK_DECLARE_GLOBAL_MUTEX(name)           SkBaseMutex  name
+#define SK_DECLARE_MUTEX_ARRAY(name, count)     SkBaseMutex name[count]
 
 #endif // !SK_USE_POSIX_THREADS
 
diff --git a/include/core/SkUserConfig.h b/include/core/SkUserConfig.h
index ef75114..91f8948 100644
--- a/include/core/SkUserConfig.h
+++ b/include/core/SkUserConfig.h
@@ -122,7 +122,7 @@
  *  To specify a different default font cache limit, define this. If this is
  *  undefined, skia will use a built-in value.
  */
-//#define SK_DEFAULT_FONT_CACHE_LIMIT   (1024 * 1024)
+#define SK_DEFAULT_FONT_CACHE_LIMIT   (768 * 1024)
 
 /* If defined, use CoreText instead of ATSUI on OS X.
 */
diff --git a/include/utils/SkCamera.h b/include/utils/SkCamera.h
index 57521b8..1c4c1fb 100644
--- a/include/utils/SkCamera.h
+++ b/include/utils/SkCamera.h
@@ -152,6 +152,9 @@
 
 #ifdef SK_BUILD_FOR_ANDROID
     void setCameraLocation(SkScalar x, SkScalar y, SkScalar z);
+    SkScalar getCameraLocationX();
+    SkScalar getCameraLocationY();
+    SkScalar getCameraLocationZ();
 #endif
 
     void getMatrix(SkMatrix*) const;
diff --git a/src/core/SkBitmap.cpp b/src/core/SkBitmap.cpp
index 58d0bd8..0b98513 100644
--- a/src/core/SkBitmap.cpp
+++ b/src/core/SkBitmap.cpp
@@ -284,7 +284,7 @@
 void SkBitmap::updatePixelsFromRef() const {
     if (NULL != fPixelRef) {
         if (fPixelLockCount > 0) {
-            SkASSERT(fPixelRef->getLockCount() > 0);
+            SkASSERT(fPixelRef->isLocked());
 
             void* p = fPixelRef->pixels();
             if (NULL != p) {
@@ -1533,7 +1533,7 @@
 #if 0   // these asserts are not thread-correct, so disable for now
     if (fPixelRef) {
         if (fPixelLockCount > 0) {
-            SkASSERT(fPixelRef->getLockCount() > 0);
+            SkASSERT(fPixelRef->isLocked());
         } else {
             SkASSERT(NULL == fPixels);
             SkASSERT(NULL == fColorTable);
diff --git a/src/core/SkBitmapProcShader.cpp b/src/core/SkBitmapProcShader.cpp
index 32b6f9e..6d64716 100644
--- a/src/core/SkBitmapProcShader.cpp
+++ b/src/core/SkBitmapProcShader.cpp
@@ -173,7 +173,7 @@
 
     SkASSERT(state.fBitmap->getPixels());
     SkASSERT(state.fBitmap->pixelRef() == NULL ||
-             state.fBitmap->pixelRef()->getLockCount());
+             state.fBitmap->pixelRef()->isLocked());
 
     for (;;) {
         int n = count;
@@ -217,7 +217,7 @@
 
     SkASSERT(state.fBitmap->getPixels());
     SkASSERT(state.fBitmap->pixelRef() == NULL ||
-             state.fBitmap->pixelRef()->getLockCount());
+             state.fBitmap->pixelRef()->isLocked());
 
     for (;;) {
         int n = count;
diff --git a/src/core/SkMallocPixelRef.cpp b/src/core/SkMallocPixelRef.cpp
index 3e22075..72dbb3d 100644
--- a/src/core/SkMallocPixelRef.cpp
+++ b/src/core/SkMallocPixelRef.cpp
@@ -18,6 +18,8 @@
     fSize = size;
     fCTable = ctable;
     SkSafeRef(ctable);
+    
+    this->setPreLocked(fStorage, fCTable);
 }
 
 SkMallocPixelRef::~SkMallocPixelRef() {
@@ -57,6 +59,8 @@
     } else {
         fCTable = NULL;
     }
+
+    this->setPreLocked(fStorage, fCTable);
 }
 
 SK_DEFINE_PIXEL_REF_REGISTRAR(SkMallocPixelRef)
diff --git a/src/core/SkPaint.cpp b/src/core/SkPaint.cpp
index b0b855a..e1932a7 100644
--- a/src/core/SkPaint.cpp
+++ b/src/core/SkPaint.cpp
@@ -7,6 +7,8 @@
  */
 
 
+#include <new>
+
 #include "SkPaint.h"
 #include "SkColorFilter.h"
 #include "SkFontHost.h"
@@ -69,6 +71,7 @@
     fTextEncoding = kUTF8_TextEncoding;
     fHinting    = SkPaintDefaults_Hinting;
 #ifdef SK_BUILD_FOR_ANDROID
+    new(&fTextLocale) SkString();
     fGenerationID = 0;
 #endif
 }
@@ -85,6 +88,9 @@
     SkSafeRef(fRasterizer);
     SkSafeRef(fLooper);
     SkSafeRef(fImageFilter);
+#ifdef SK_BUILD_FOR_ANDROID
+    new(&fTextLocale) SkString(src.fTextLocale);
+#endif
 }
 
 SkPaint::~SkPaint() {
@@ -123,10 +129,12 @@
     SkSafeUnref(fImageFilter);
 
 #ifdef SK_BUILD_FOR_ANDROID
+    fTextLocale.~SkString();
     uint32_t oldGenerationID = fGenerationID;
 #endif
     memcpy(this, &src, sizeof(src));
 #ifdef SK_BUILD_FOR_ANDROID
+    new(&fTextLocale) SkString(src.fTextLocale);
     fGenerationID = oldGenerationID + 1;
 #endif
 
@@ -357,6 +365,15 @@
     }
 }
 
+#ifdef SK_BUILD_FOR_ANDROID
+void SkPaint::setTextLocale(const SkString& locale) {
+    if(!fTextLocale.equals(locale)) {
+        fTextLocale.set(locale);
+        GEN_ID_INC;
+    }
+}
+#endif
+
 ///////////////////////////////////////////////////////////////////////////////
 
 SkTypeface* SkPaint::setTypeface(SkTypeface* font) {
@@ -423,6 +440,37 @@
     SkGlyphCache::AttachCache(cache);
     return image;
 }
+
+int SkPaint::utfToGlyphs(const void* textData, TextEncoding encoding,
+                         size_t byteLength, uint16_t glyphs[]) const {
+
+    SkAutoGlyphCache autoCache(*this, NULL);
+    SkGlyphCache* cache = autoCache.getCache();
+    
+    const char* text = (const char*) textData;
+    const char* stop = text + byteLength;
+    uint16_t* gptr = glyphs;
+    
+    switch (encoding) {
+        case SkPaint::kUTF8_TextEncoding:
+            while (text < stop) {
+                *gptr++ = cache->unicharToGlyph(SkUTF8_NextUnichar(&text));
+            }
+            break;
+        case SkPaint::kUTF16_TextEncoding: {
+            const uint16_t* text16 = (const uint16_t*)text;
+            const uint16_t* stop16 = (const uint16_t*)stop;
+            while (text16 < stop16) {
+                *gptr++ = cache->unicharToGlyph(SkUTF16_NextUnichar(&text16));
+            }
+            break;
+        }
+        default:
+            SkDEBUGFAIL("unknown text encoding");
+    }
+    return gptr - glyphs;
+}
+
 #endif
 
 int SkPaint::textToGlyphs(const void* textData, size_t byteLength,
@@ -1399,7 +1447,8 @@
                               const SkMatrix* deviceMatrix, Rec* rec) {
     SkASSERT(deviceMatrix == NULL || !deviceMatrix->hasPerspective());
 
-    rec->fOrigFontID = SkTypeface::UniqueID(paint.getTypeface());
+    SkTypeface* typeface = paint.getTypeface();
+    rec->fOrigFontID = SkTypeface::UniqueID(typeface);
     rec->fFontID = rec->fOrigFontID;
     rec->fTextSize = paint.getTextSize();
     rec->fPreScaleX = paint.getTextScaleX();
@@ -1420,10 +1469,21 @@
 
     unsigned flags = 0;
 
-    if (paint.isFakeBoldText()) {
 #ifdef SK_USE_FREETYPE_EMBOLDEN
+    // It is possible that the SkTypeface used to draw glyphs has
+    // different properties than the SkTypeface set in the SkPaint.
+    // If we are asked to render bold text with a bold font, and are
+    // forced to fall back to a font with normal weight for some
+    // glyphs, we need to use fake bold to render those glyphs. In
+    // order to do that, we set SkScalerContext's "embolden" flag
+    // here if we are trying to draw bold text via any means, and
+    // ignore it at the glyph outline generation stage if the font
+    // actually being used is already bold.
+    if (paint.isFakeBoldText() || (typeface && typeface->isBold())) {
         flags |= SkScalerContext::kEmbolden_Flag;
+    }
 #else
+    if (paint.isFakeBoldText()) {
         SkScalar fakeBoldScale = SkScalarInterpFunc(paint.getTextSize(),
                                                     kStdFakeBoldInterpKeys,
                                                     kStdFakeBoldInterpValues,
@@ -1436,8 +1496,8 @@
         } else {
             strokeWidth += extra;
         }
-#endif
     }
+#endif
 
     if (paint.isDevKernText()) {
         flags |= SkScalerContext::kDevKernText_Flag;
@@ -2018,9 +2078,12 @@
     }
 
     // can't use our canonical size if we need to apply patheffects/strokes
-    if (fPaint.isLinearText() && !applyStrokeAndPathEffects) {
+    if (fPaint.getPathEffect() == NULL) {
         fPaint.setTextSize(SkIntToScalar(SkPaint::kCanonicalTextSizeForPaths));
         fScale = paint.getTextSize() / SkPaint::kCanonicalTextSizeForPaths;
+        if (has_thick_frame(fPaint)) {
+            fPaint.setStrokeWidth(SkScalarDiv(fPaint.getStrokeWidth(), fScale));
+        }
     } else {
         fScale = SK_Scalar1;
     }
diff --git a/src/core/SkPictureRecord.cpp b/src/core/SkPictureRecord.cpp
index ba49b72..fdeec3f 100644
--- a/src/core/SkPictureRecord.cpp
+++ b/src/core/SkPictureRecord.cpp
@@ -319,7 +319,7 @@
 
 void SkPictureRecord::drawText(const void* text, size_t byteLength, SkScalar x,
                       SkScalar y, const SkPaint& paint) {
-    bool fast = paint.canComputeFastBounds();
+    bool fast = !paint.isVerticalText() && paint.canComputeFastBounds();
 
     addDraw(fast ? DRAW_TEXT_TOP_BOTTOM : DRAW_TEXT);
     addPaint(paint);
@@ -356,7 +356,7 @@
         }
     }
 
-    bool fastBounds = paint.canComputeFastBounds();
+    bool fastBounds = !paint.isVerticalText() && paint.canComputeFastBounds();
     bool fast = canUseDrawH && fastBounds;
 
     if (fast) {
@@ -404,7 +404,7 @@
     if (0 == points)
         return;
 
-    bool fast = paint.canComputeFastBounds();
+    bool fast = !paint.isVerticalText() && paint.canComputeFastBounds();
 
     addDraw(fast ? DRAW_POS_TEXT_H_TOP_BOTTOM : DRAW_POS_TEXT_H);
     addPaint(paint);
diff --git a/src/core/SkPixelRef.cpp b/src/core/SkPixelRef.cpp
index d5e1b81..1279f06 100644
--- a/src/core/SkPixelRef.cpp
+++ b/src/core/SkPixelRef.cpp
@@ -9,7 +9,28 @@
 #include "SkFlattenable.h"
 #include "SkThread.h"
 
-SK_DECLARE_STATIC_MUTEX(gPixelRefMutex);
+// must be a power-of-2. undef to just use 1 mutex
+#define PIXELREF_MUTEX_RING_COUNT       32
+
+#ifdef PIXELREF_MUTEX_RING_COUNT
+    static int32_t gPixelRefMutexRingIndex;
+    static SK_DECLARE_MUTEX_ARRAY(gPixelRefMutexRing, PIXELREF_MUTEX_RING_COUNT);
+#else
+    SK_DECLARE_STATIC_MUTEX(gPixelRefMutex);
+#endif
+
+SkBaseMutex* get_default_mutex() {
+#ifdef PIXELREF_MUTEX_RING_COUNT
+    // atomic_inc might be overkill here. It may be fine if once in a while
+    // we hit a race-condition and two subsequent calls get the same index...
+    int index = sk_atomic_inc(&gPixelRefMutexRingIndex);
+    return &gPixelRefMutexRing[index & (PIXELREF_MUTEX_RING_COUNT - 1)];
+#else
+    return &gPixelRefMutex;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
 
 extern int32_t SkNextPixelRefGenerationID();
 int32_t SkNextPixelRefGenerationID() {
@@ -23,29 +44,45 @@
     return genID;
 }
 
+///////////////////////////////////////////////////////////////////////////////
 
-SkPixelRef::SkPixelRef(SkBaseMutex* mutex) {
+void SkPixelRef::setMutex(SkBaseMutex* mutex) {
     if (NULL == mutex) {
-        mutex = &gPixelRefMutex;
+        mutex = get_default_mutex();
     }
     fMutex = mutex;
+}
+
+// just need a > 0 value, so pick a funny one to aid in debugging
+#define SKPIXELREF_PRELOCKED_LOCKCOUNT     123456789
+
+SkPixelRef::SkPixelRef(SkBaseMutex* mutex) : fPreLocked(false) {
+    this->setMutex(mutex);
     fPixels = NULL;
     fColorTable = NULL; // we do not track ownership of this
     fLockCount = 0;
     fGenerationID = 0;  // signal to rebuild
     fIsImmutable = false;
+    fPreLocked = false;
 }
 
 SkPixelRef::SkPixelRef(SkFlattenableReadBuffer& buffer, SkBaseMutex* mutex) {
-    if (NULL == mutex) {
-        mutex = &gPixelRefMutex;
-    }
-    fMutex = mutex;
+    this->setMutex(mutex);
     fPixels = NULL;
     fColorTable = NULL; // we do not track ownership of this
     fLockCount = 0;
     fGenerationID = 0;  // signal to rebuild
     fIsImmutable = buffer.readBool();
+    fPreLocked = false;
+}
+
+void SkPixelRef::setPreLocked(void* pixels, SkColorTable* ctable) {
+    // only call me in your constructor, otherwise fLockCount tracking can get
+    // out of sync.
+    fPixels = pixels;
+    fColorTable = ctable;
+    fLockCount = SKPIXELREF_PRELOCKED_LOCKCOUNT;
+    fPreLocked = true;
 }
 
 void SkPixelRef::flatten(SkFlattenableWriteBuffer& buffer) const {
@@ -53,21 +90,29 @@
 }
 
 void SkPixelRef::lockPixels() {
-    SkAutoMutexAcquire  ac(*fMutex);
+    SkASSERT(!fPreLocked || SKPIXELREF_PRELOCKED_LOCKCOUNT == fLockCount);
 
-    if (1 == ++fLockCount) {
-        fPixels = this->onLockPixels(&fColorTable);
+    if (!fPreLocked) {
+        SkAutoMutexAcquire  ac(*fMutex);
+
+        if (1 == ++fLockCount) {
+            fPixels = this->onLockPixels(&fColorTable);
+        }
     }
 }
 
 void SkPixelRef::unlockPixels() {
-    SkAutoMutexAcquire  ac(*fMutex);
+    SkASSERT(!fPreLocked || SKPIXELREF_PRELOCKED_LOCKCOUNT == fLockCount);
+    
+    if (!fPreLocked) {
+        SkAutoMutexAcquire  ac(*fMutex);
 
-    SkASSERT(fLockCount > 0);
-    if (0 == --fLockCount) {
-        this->onUnlockPixels();
-        fPixels = NULL;
-        fColorTable = NULL;
+        SkASSERT(fLockCount > 0);
+        if (0 == --fLockCount) {
+            this->onUnlockPixels();
+            fPixels = NULL;
+            fColorTable = NULL;
+        }
     }
 }
 
diff --git a/src/core/SkScan_AntiPath.cpp b/src/core/SkScan_AntiPath.cpp
index e80ad3e..0834caa 100644
--- a/src/core/SkScan_AntiPath.cpp
+++ b/src/core/SkScan_AntiPath.cpp
@@ -392,10 +392,12 @@
         return false;
 #endif
         int width = bounds.width();
-        int rb = SkAlign4(width);
+        int64_t rb = SkAlign4(width);
+        // use 64bits to detect overflow
+        int64_t storage = rb * bounds.height();
 
         return (width <= MaskSuperBlitter::kMAX_WIDTH) &&
-        (rb * bounds.height() <= MaskSuperBlitter::kMAX_STORAGE);
+               (storage <= MaskSuperBlitter::kMAX_STORAGE);
     }
 
 private:
@@ -579,6 +581,29 @@
             r.fRight < max && r.fBottom < max;
 }
 
+static int overflows_short_shift(int value, int shift) {
+    const int s = 16 + shift;
+    return (value << s >> s) - value;
+}
+
+/**
+  Would any of the coordinates of this rectangle not fit in a short,
+  when left-shifted by shift?
+*/
+static int rect_overflows_short_shift(SkIRect rect, int shift) {
+    SkASSERT(!overflows_short_shift(8191, SHIFT));
+    SkASSERT(overflows_short_shift(8192, SHIFT));
+    SkASSERT(!overflows_short_shift(32767, 0));
+    SkASSERT(overflows_short_shift(32768, 0));
+
+    // Since we expect these to succeed, we bit-or together
+    // for a tiny extra bit of speed.
+    return overflows_short_shift(rect.fLeft, SHIFT) |
+           overflows_short_shift(rect.fRight, SHIFT) |
+           overflows_short_shift(rect.fTop, SHIFT) |
+           overflows_short_shift(rect.fBottom, SHIFT);
+}
+
 static bool safeRoundOut(const SkRect& src, SkIRect* dst, int32_t maxInt) {
 #ifdef SK_SCALAR_IS_FIXED
     // the max-int (shifted) is exactly what we want to compare against, to know
@@ -616,12 +641,30 @@
         return;
     }
 
+    // If the intersection of the path bounds and the clip bounds
+    // will overflow 32767 when << by SHIFT, we can't supersample,
+    // so draw without antialiasing.
+    SkIRect clippedIR;
+    if (path.isInverseFillType()) {
+       // If the path is an inverse fill, it's going to fill the entire
+       // clip, and we care whether the entire clip exceeds our limits.
+       clippedIR = origClip.getBounds();
+    } else {
+       if (!clippedIR.intersect(ir, origClip.getBounds())) {
+           return;
+       }
+    }
+    if (rect_overflows_short_shift(clippedIR, SHIFT)) {
+        SkScan::FillPath(path, origClip, blitter);
+        return;
+    }
+
     // Our antialiasing can't handle a clip larger than 32767, so we restrict
     // the clip to that limit here. (the runs[] uses int16_t for its index).
     //
-    // A more general solution (one that could also eliminate the need to disable
-    // aa based on ir bounds (see overflows_short_shift) would be to tile the
-    // clip/target...
+    // A more general solution (one that could also eliminate the need to
+    // disable aa based on ir bounds (see overflows_short_shift) would be
+    // to tile the clip/target...
     SkRegion tmpClipStorage;
     const SkRegion* clipRgn = &origClip;
     {
diff --git a/src/images/SkImageDecoder.cpp b/src/images/SkImageDecoder.cpp
index de0afbb..c4eac9c 100644
--- a/src/images/SkImageDecoder.cpp
+++ b/src/images/SkImageDecoder.cpp
@@ -171,19 +171,14 @@
 
 bool SkImageDecoder::decodeRegion(SkBitmap* bm, SkIRect rect,
                                   SkBitmap::Config pref) {
-    // pass a temporary bitmap, so that if we return false, we are assured of
-    // leaving the caller's bitmap untouched.
-    SkBitmap    tmp;
-
     // we reset this to false before calling onDecodeRegion
     fShouldCancelDecode = false;
     // assign this, for use by getPrefConfig(), in case fUsePrefTable is false
     fDefaultPref = pref;
 
-    if (!this->onDecodeRegion(&tmp, rect)) {
+    if (!this->onDecodeRegion(bm, rect)) {
         return false;
     }
-    bm->swap(tmp);
     return true;
 }
 
@@ -200,25 +195,30 @@
                                     int width, int height, int srcX, int srcY) {
     int w = width / sampleSize;
     int h = height / sampleSize;
-    if (w == src->width() && h == src->height() &&
-          (srcX - destX) / sampleSize == 0 && (srcY - destY) / sampleSize == 0) {
-        // The output rect is the same as the decode result
-        dest->swap(*src);
-        return;
-    }
-    dest->setConfig(src->getConfig(), w, h);
-    dest->setIsOpaque(src->isOpaque());
+    // if the destination has no pixels then we must allocate them.
+    if (dest->isNull()) {
+        dest->setConfig(src->getConfig(), w, h);
+        dest->setIsOpaque(src->isOpaque());
 
-    if (!this->allocPixelRef(dest, NULL)) {
-#ifdef SK_DEBUG
-        SkDebugf("failed to allocate pixels needed to crop the bitmap");
-#endif
-        return;
+        if (!this->allocPixelRef(dest, NULL)) {
+            SkDEBUGF(("failed to allocate pixels needed to crop the bitmap"));
+            return;
+        }
     }
+    // check to see if the destination is large enough to decode the desired
+    // region. If this assert fails we will just draw as much of the source
+    // into the destination that we can.
+    SkASSERT(dest->width() >= w && dest->height() >= h);
+
+    // Set the Src_Mode for the paint to prevent transparency issue in the
+    // dest in the event that the dest was being re-used.
+    SkPaint paint;
+    paint.setXfermodeMode(SkXfermode::kSrc_Mode);
 
     SkCanvas canvas(*dest);
-    canvas.drawBitmap(*src, (srcX - destX) / sampleSize,
-                             (srcY - destY) / sampleSize);
+    canvas.drawSprite(*src, (srcX - destX) / sampleSize,
+                            (srcY - destY) / sampleSize,
+                            &paint);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/src/images/SkImageDecoder_libbmp.cpp b/src/images/SkImageDecoder_libbmp.cpp
index b5e49e8..6f82def 100644
--- a/src/images/SkImageDecoder_libbmp.cpp
+++ b/src/images/SkImageDecoder_libbmp.cpp
@@ -111,11 +111,19 @@
 
     SkScaledBitmapSampler sampler(width, height, getSampleSize());
 
-    bm->setConfig(config, sampler.scaledWidth(), sampler.scaledHeight());
-    bm->setIsOpaque(true);
     if (justBounds) {
+        bm->setConfig(config, sampler.scaledWidth(), sampler.scaledHeight());
+        bm->setIsOpaque(true);
         return true;
     }
+#ifdef SK_BUILD_FOR_ANDROID
+    // No Bitmap reuse supported for this format
+    if (!bm->isNull()) {
+        return false;
+    }
+#endif
+    bm->setConfig(config, sampler.scaledWidth(), sampler.scaledHeight());
+    bm->setIsOpaque(true);
 
     if (!this->allocPixelRef(bm, NULL)) {
         return false;
diff --git a/src/images/SkImageDecoder_libgif.cpp b/src/images/SkImageDecoder_libgif.cpp
index 7a451a0..2975478 100644
--- a/src/images/SkImageDecoder_libgif.cpp
+++ b/src/images/SkImageDecoder_libgif.cpp
@@ -191,10 +191,18 @@
                 return error_return(gif, *bm, "chooseFromOneChoice");
             }
             
-            bm->setConfig(SkBitmap::kIndex8_Config, width, height);
-            if (SkImageDecoder::kDecodeBounds_Mode == mode)
+            if (SkImageDecoder::kDecodeBounds_Mode == mode) {
+                bm->setConfig(SkBitmap::kIndex8_Config, width, height);
                 return true;
+            }
+#ifdef SK_BUILD_FOR_ANDROID
+            // No Bitmap reuse supported for this format
+            if (!bm->isNull()) {
+                return false;
+            }
+#endif
 
+            bm->setConfig(SkBitmap::kIndex8_Config, width, height);
             SavedImage* image = &gif->SavedImages[gif->ImageCount-1];
             const GifImageDesc& desc = image->ImageDesc;
             
diff --git a/src/images/SkImageDecoder_libico.cpp b/src/images/SkImageDecoder_libico.cpp
index bb6bc95..25a5078 100644
--- a/src/images/SkImageDecoder_libico.cpp
+++ b/src/images/SkImageDecoder_libico.cpp
@@ -234,12 +234,18 @@
     //if the andbitmap (mask) is all zeroes, then we can easily do an index bitmap
     //however, with small images with large colortables, maybe it's better to still do argb_8888
 
-    bm->setConfig(SkBitmap::kARGB_8888_Config, w, h, calculateRowBytesFor8888(w, bitCount));
-    
     if (SkImageDecoder::kDecodeBounds_Mode == mode) {
+        bm->setConfig(SkBitmap::kARGB_8888_Config, w, h, calculateRowBytesFor8888(w, bitCount));
         delete[] colors;
         return true;
     }
+#ifdef SK_BUILD_FOR_ANDROID
+    // No Bitmap reuse supported for this format
+    if (!bm->isNull()) {
+        return false;
+    }
+#endif
+    bm->setConfig(SkBitmap::kARGB_8888_Config, w, h, calculateRowBytesFor8888(w, bitCount));
 
     if (!this->allocPixelRef(bm, NULL))
     {
diff --git a/src/images/SkImageDecoder_libjpeg.cpp b/src/images/SkImageDecoder_libjpeg.cpp
index 8d87450..fbb6887 100644
--- a/src/images/SkImageDecoder_libjpeg.cpp
+++ b/src/images/SkImageDecoder_libjpeg.cpp
@@ -77,6 +77,8 @@
     virtual bool onDecode(SkStream* stream, SkBitmap* bm, Mode);
 private:
     SkJPEGImageIndex *index;
+    int imageWidth;
+    int imageHeight;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -528,7 +530,8 @@
     index->cinfo = cinfo;
     *height = cinfo->output_height;
     *width = cinfo->output_width;
-
+    this->imageWidth = *width;
+    this->imageHeight = *height;
     this->index = index;
     return true;
 }
@@ -537,11 +540,14 @@
     if (index == NULL) {
         return false;
     }
-    int startX = region.fLeft;
-    int startY = region.fTop;
-    int width = region.width();
-    int height = region.height();
     jpeg_decompress_struct *cinfo = index->cinfo;
+
+    SkIRect rect = SkIRect::MakeWH(this->imageWidth, this->imageHeight);
+    if (!rect.intersect(region)) {
+        // If the requested region is entirely outsides the image, just
+        // returns false
+        return false;
+    }
     SkAutoMalloc  srcStorage;
     skjpeg_error_mgr        sk_err;
     cinfo->err = jpeg_std_error(&sk_err);
@@ -579,11 +585,11 @@
         }
     }
 #endif
+    int startX = rect.fLeft;
+    int startY = rect.fTop;
+    int width = rect.width();
+    int height = rect.height();
 
-    int oriStartX = startX;
-    int oriStartY = startY;
-    int oriWidth = width;
-    int oriHeight = height;
     jpeg_init_read_tile_scanline(cinfo, index->index,
                                  &startX, &startY, &width, &height);
     int skiaSampleSize = recompute_sampleSize(requestedSampleSize, *cinfo);
@@ -604,9 +610,30 @@
     {
         bitmap->setConfig(config, cinfo->output_width, height);
         bitmap->setIsOpaque(true);
-        if (!this->allocPixelRef(bitmap, NULL)) {
-            return return_false(*cinfo, *bitmap, "allocPixelRef");
+
+        // Check ahead of time if the swap(dest, src) is possible or not.
+        // If yes, then we will stick to AllocPixelRef since it's cheaper
+        // with the swap happening. If no, then we will use alloc to allocate
+        // pixels to prevent garbage collection.
+        //
+        // Not using a recycled-bitmap and the output rect is same as the
+        // decoded region.
+        int w = rect.width() / actualSampleSize;
+        int h = rect.height() / actualSampleSize;
+        bool swapOnly = (rect == region) && bm->isNull() &&
+                        (w == bitmap->width()) && (h == bitmap->height()) &&
+                        ((startX - rect.x()) / actualSampleSize == 0) &&
+                        ((startY - rect.y()) / actualSampleSize == 0);
+        if (swapOnly) {
+            if (!this->allocPixelRef(bitmap, NULL)) {
+                return return_false(*cinfo, *bitmap, "allocPixelRef");
+            }
+        } else {
+            if (!bitmap->allocPixels()) {
+                return return_false(*cinfo, *bitmap, "allocPixels");
+            }
         }
+
         SkAutoLockPixels alp(*bitmap);
         JSAMPLE* rowptr = (JSAMPLE*)bitmap->getPixels();
         INT32 const bpr = bitmap->rowBytes();
@@ -626,8 +653,13 @@
             row_total_count += row_count;
             rowptr += bpr;
         }
-        cropBitmap(bm, bitmap, actualSampleSize, oriStartX, oriStartY,
-                   oriWidth, oriHeight, startX, startY);
+
+        if (swapOnly) {
+            bm->swap(*bitmap);
+        } else {
+            cropBitmap(bm, bitmap, actualSampleSize, region.x(), region.y(),
+                       region.width(), region.height(), startX, startY);
+        }
         return true;
     }
 #endif
@@ -653,8 +685,24 @@
     bitmap->setConfig(config, sampler.scaledWidth(), sampler.scaledHeight());
     bitmap->setIsOpaque(true);
 
-    if (!this->allocPixelRef(bitmap, NULL)) {
-        return return_false(*cinfo, *bitmap, "allocPixelRef");
+    // Check ahead of time if the swap(dest, src) is possible or not.
+    // If yes, then we will stick to AllocPixelRef since it's cheaper with the
+    // swap happening. If no, then we will use alloc to allocate pixels to
+    // prevent garbage collection.
+    int w = rect.width() / actualSampleSize;
+    int h = rect.height() / actualSampleSize;
+    bool swapOnly = (rect == region) && bm->isNull() &&
+                    (w == bitmap->width()) && (h == bitmap->height()) &&
+                    ((startX - rect.x()) / actualSampleSize == 0) &&
+                    ((startY - rect.y()) / actualSampleSize == 0);
+    if (swapOnly) {
+        if (!this->allocPixelRef(bitmap, NULL)) {
+            return return_false(*cinfo, *bitmap, "allocPixelRef");
+        }
+    } else {
+        if (!bitmap->allocPixels()) {
+            return return_false(*cinfo, *bitmap, "allocPixels");
+        }
     }
 
     SkAutoLockPixels alp(*bitmap);
@@ -691,8 +739,12 @@
             return return_false(*cinfo, *bitmap, "skip rows");
         }
     }
-    cropBitmap(bm, bitmap, actualSampleSize, oriStartX, oriStartY,
-               oriWidth, oriHeight, startX, startY);
+    if (swapOnly) {
+        bm->swap(*bitmap);
+    } else {
+        cropBitmap(bm, bitmap, actualSampleSize, region.x(), region.y(),
+                   region.width(), region.height(), startX, startY);
+    }
     return true;
 }
 
diff --git a/src/images/SkImageDecoder_libpng.cpp b/src/images/SkImageDecoder_libpng.cpp
index 138c28c..fa35239 100644
--- a/src/images/SkImageDecoder_libpng.cpp
+++ b/src/images/SkImageDecoder_libpng.cpp
@@ -58,7 +58,7 @@
 protected:
     virtual bool onBuildTileIndex(SkStream *stream,
              int *width, int *height);
-    virtual bool onDecodeRegion(SkBitmap* bitmap, SkIRect rect);
+    virtual bool onDecodeRegion(SkBitmap* bitmap, SkIRect region);
     virtual bool onDecode(SkStream* stream, SkBitmap* bm, Mode);
 
 private:
@@ -616,7 +616,7 @@
     return true;
 }
 
-bool SkPNGImageDecoder::onDecodeRegion(SkBitmap* bm, SkIRect rect) {
+bool SkPNGImageDecoder::onDecodeRegion(SkBitmap* bm, SkIRect region) {
     int i;
     png_structp png_ptr = this->index->png_ptr;
     png_infop info_ptr = this->index->info_ptr;
@@ -624,14 +624,19 @@
         return false;
     }
 
-    int requestedHeight = rect.fBottom - rect.fTop;
-    int requestedWidth = rect.fRight - rect.fLeft;
-
     png_uint_32 origWidth, origHeight;
     int bit_depth, color_type, interlace_type;
     png_get_IHDR(png_ptr, info_ptr, &origWidth, &origHeight, &bit_depth,
             &color_type, &interlace_type, int_p_NULL, int_p_NULL);
 
+    SkIRect rect = SkIRect::MakeWH(origWidth, origHeight);
+
+    if (!rect.intersect(region)) {
+        // If the requested region is entirely outsides the image, just
+        // returns false
+        return false;
+    }
+
     SkBitmap::Config    config;
     bool                hasAlpha = false;
     bool                doDither = this->getDitherImage();
@@ -643,7 +648,7 @@
     }
 
     const int sampleSize = this->getSampleSize();
-    SkScaledBitmapSampler sampler(origWidth, requestedHeight, sampleSize);
+    SkScaledBitmapSampler sampler(origWidth, rect.height(), sampleSize);
 
     SkBitmap *decodedBitmap = new SkBitmap;
     SkAutoTDelete<SkBitmap> adb(decodedBitmap);
@@ -666,12 +671,25 @@
 
     SkAutoUnref aur(colorTable);
 
-    if (!this->allocPixelRef(decodedBitmap,
-                             SkBitmap::kIndex8_Config == config ?
-                                colorTable : NULL)) {
-        return false;
+    // Check ahead of time if the swap(dest, src) is possible in crop or not.
+    // If yes, then we will stick to AllocPixelRef since it's cheaper with the swap happening.
+    // If no, then we will use alloc to allocate pixels to prevent garbage collection.
+    int w = rect.width() / sampleSize;
+    int h = rect.height() / sampleSize;
+    bool swapOnly = (rect == region) && (w == decodedBitmap->width()) &&
+                    (h == decodedBitmap->height()) &&
+                    ((0 - rect.x()) / sampleSize == 0) && bm->isNull();
+    if (swapOnly) {
+        if (!this->allocPixelRef(decodedBitmap,
+                SkBitmap::kIndex8_Config == config ? colorTable : NULL)) {
+            return false;
+        }
+    } else {
+        if (!decodedBitmap->allocPixels(
+            NULL, SkBitmap::kIndex8_Config == config ? colorTable : NULL)) {
+            return false;
+        }
     }
-
     SkAutoLockPixels alp(*decodedBitmap);
 
     /* Add filler (or alpha) byte (before/after each RGB triplet) */
@@ -693,8 +711,6 @@
     png_ptr->pass = 0;
     png_read_update_info(png_ptr, info_ptr);
 
-    // SkDebugf("Request size %d %d\n", requestedWidth, requestedHeight);
-
     int actualTop = rect.fTop;
 
     if (SkBitmap::kIndex8_Config == config && 1 == sampleSize) {
@@ -744,7 +760,7 @@
                     png_read_rows(png_ptr, &bmRow, png_bytepp_NULL, 1);
                 }
                 uint8_t* row = base;
-                for (png_uint_32 y = 0; y < requestedHeight; y++) {
+                for (png_uint_32 y = 0; y < rect.height(); y++) {
                     uint8_t* bmRow = row;
                     png_read_rows(png_ptr, &bmRow, png_bytepp_NULL, 1);
                     row += rb;
@@ -777,8 +793,12 @@
             }
         }
     }
-    cropBitmap(bm, decodedBitmap, sampleSize, rect.fLeft, rect.fTop,
-                requestedWidth, requestedHeight, 0, rect.fTop);
+    if (swapOnly) {
+        bm->swap(*decodedBitmap);
+    } else {
+        cropBitmap(bm, decodedBitmap, sampleSize, region.x(), region.y(),
+                   region.width(), region.height(), 0, rect.y());
+    }
 
     if (0 != theTranspColor) {
         reallyHasAlpha |= substituteTranspColor(decodedBitmap, theTranspColor);
diff --git a/src/images/SkImageDecoder_libwebp.cpp b/src/images/SkImageDecoder_libwebp.cpp
index 984331c..3e416cc 100644
--- a/src/images/SkImageDecoder_libwebp.cpp
+++ b/src/images/SkImageDecoder_libwebp.cpp
@@ -21,6 +21,7 @@
 #include "SkStream.h"
 #include "SkTemplates.h"
 #include "SkUtils.h"
+#include "SkTScopedPtr.h"
 
 // A WebP decoder only, on top of (subset of) libwebp
 // For more information on WebP image format, and libwebp library, see:
@@ -151,7 +152,7 @@
 // Incremental WebP image decoding. Reads input buffer of 64K size iteratively
 // and decodes this block to appropriate color-space as per config object.
 static bool webp_idecode(SkStream* stream, WebPDecoderConfig& config) {
-    WebPIDecoder* idec = WebPIDecode(NULL, NULL, &config);
+    WebPIDecoder* idec = WebPIDecode(NULL, 0, &config);
     if (idec == NULL) {
         WebPFreeDecBuffer(&config.output);
         return false;
@@ -309,33 +310,82 @@
     return true;
 }
 
+static bool isConfigCompatiable(SkBitmap* bitmap) {
+    SkBitmap::Config config = bitmap->config();
+    return config == SkBitmap::kARGB_4444_Config ||
+           config == SkBitmap::kRGB_565_Config ||
+           config == SkBitmap::kARGB_8888_Config;
+}
+
 bool SkWEBPImageDecoder::onDecodeRegion(SkBitmap* decodedBitmap,
                                         SkIRect region) {
-    const int width = region.width();
-    const int height = region.height();
+    SkIRect rect = SkIRect::MakeWH(origWidth, origHeight);
 
-    const int sampleSize = this->getSampleSize();
-    SkScaledBitmapSampler sampler(width, height, sampleSize);
-
-    if (!setDecodeConfig(decodedBitmap, sampler.scaledWidth(),
-                         sampler.scaledHeight())) {
+    if (!rect.intersect(region)) {
+        // If the requested region is entirely outsides the image, just
+        // returns false
         return false;
     }
 
-    if (!this->allocPixelRef(decodedBitmap, NULL)) {
-        return return_false(*decodedBitmap, "allocPixelRef");
+    const int sampleSize = this->getSampleSize();
+    SkScaledBitmapSampler sampler(rect.width(), rect.height(), sampleSize);
+    const int width = sampler.scaledWidth();
+    const int height = sampler.scaledHeight();
+
+    // The image can be decoded directly to decodedBitmap if
+    //   1. the region is within the image range
+    //   2. bitmap's config is compatible
+    //   3. bitmap's size is same as the required region (after sampled)
+    bool directDecode = (rect == region) &&
+                        (decodedBitmap->isNull() ||
+                         isConfigCompatiable(decodedBitmap) &&
+                         (decodedBitmap->width() == width) &&
+                         (decodedBitmap->height() == height));
+    SkTScopedPtr<SkBitmap> adb;
+    SkBitmap *bitmap = decodedBitmap;
+
+    if (!directDecode) {
+        // allocates a temp bitmap
+        bitmap = new SkBitmap;
+        adb.reset(bitmap);
     }
 
-    SkAutoLockPixels alp(*decodedBitmap);
+    if (bitmap->isNull()) {
+        if (!setDecodeConfig(bitmap, width, height)) {
+            return false;
+        }
+        // alloc from native heap if it is a temp bitmap. (prevent GC)
+        bool allocResult = (bitmap == decodedBitmap)
+                               ? allocPixelRef(bitmap, NULL)
+                               : bitmap->allocPixels();
+        if (!allocResult) {
+            return return_false(*decodedBitmap, "allocPixelRef");
+        }
+    } else {
+        // This is also called in setDecodeConfig in above block.
+        // i.e., when bitmap->isNull() is true.
+        if (!chooseFromOneChoice(bitmap->config(), width, height)) {
+            return false;
+        }
+    }
 
+    SkAutoLockPixels alp(*bitmap);
     WebPDecoderConfig config;
-    if (!webp_get_config_resize_crop(config, decodedBitmap, region)) {
+    if (!webp_get_config_resize_crop(config, bitmap, rect)) {
         return false;
     }
 
     // Decode the WebP image data stream using WebP incremental decoding for
     // the specified cropped image-region.
-    return webp_idecode(this->inputStream, config);
+    if (!webp_idecode(this->inputStream, config)) {
+        return false;
+    }
+
+    if (!directDecode) {
+        cropBitmap(decodedBitmap, bitmap, sampleSize, region.x(), region.y(),
+                  region.width(), region.height(), rect.x(), rect.y());
+    }
+    return true;
 }
 
 bool SkWEBPImageDecoder::onDecode(SkStream* stream, SkBitmap* decodedBitmap,
@@ -352,16 +402,25 @@
     const int sampleSize = this->getSampleSize();
     SkScaledBitmapSampler sampler(origWidth, origHeight, sampleSize);
 
+    // If only bounds are requested, done
+    if (SkImageDecoder::kDecodeBounds_Mode == mode) {
+        if (!setDecodeConfig(decodedBitmap, sampler.scaledWidth(),
+                             sampler.scaledHeight())) {
+            return false;
+        }
+        return true;
+    }
+#ifdef SK_BUILD_FOR_ANDROID
+    // No Bitmap reuse supported for this format
+    if (!decodedBitmap->isNull()) {
+        return false;
+    }
+#endif
     if (!setDecodeConfig(decodedBitmap, sampler.scaledWidth(),
                          sampler.scaledHeight())) {
         return false;
     }
 
-    // If only bounds are requested, done
-    if (SkImageDecoder::kDecodeBounds_Mode == mode) {
-        return true;
-    }
-
     if (!this->allocPixelRef(decodedBitmap, NULL)) {
         return return_false(*decodedBitmap, "allocPixelRef");
     }
diff --git a/src/images/SkImageDecoder_wbmp.cpp b/src/images/SkImageDecoder_wbmp.cpp
index a7d910f..1ec82d9 100644
--- a/src/images/SkImageDecoder_wbmp.cpp
+++ b/src/images/SkImageDecoder_wbmp.cpp
@@ -110,13 +110,22 @@
     int width = head.fWidth;
     int height = head.fHeight;
     
+    if (SkImageDecoder::kDecodeBounds_Mode == mode) {
+        // assign these directly, in case we return kDimensions_Result
+        decodedBitmap->setConfig(SkBitmap::kIndex8_Config, width, height);
+        decodedBitmap->setIsOpaque(true);
+        return true;
+    }
+#ifdef SK_BUILD_FOR_ANDROID
+    // No Bitmap reuse supported for this format
+    if (!decodedBitmap->isNull()) {
+        return false;
+    }
+#endif
     // assign these directly, in case we return kDimensions_Result
     decodedBitmap->setConfig(SkBitmap::kIndex8_Config, width, height);
     decodedBitmap->setIsOpaque(true);
-    
-    if (SkImageDecoder::kDecodeBounds_Mode == mode)
-        return true;
-    
+
     const SkPMColor colors[] = { SK_ColorBLACK, SK_ColorWHITE };
     SkColorTable* ct = SkNEW_ARGS(SkColorTable, (colors, 2));
     SkAutoUnref   aur(ct);
diff --git a/src/images/SkImageRefPool.cpp b/src/images/SkImageRefPool.cpp
index bfa933e..c24dba0 100644
--- a/src/images/SkImageRefPool.cpp
+++ b/src/images/SkImageRefPool.cpp
@@ -59,7 +59,7 @@
     
     while (NULL != ref && fRAMUsed > limit) {
         // only purge it if its pixels are unlocked
-        if (0 == ref->getLockCount() && ref->fBitmap.getPixels()) {
+        if (!ref->isLocked() && ref->fBitmap.getPixels()) {
             size_t size = ref->ramUsed();
             SkASSERT(size <= fRAMUsed);
             fRAMUsed -= size;
@@ -181,10 +181,10 @@
     SkImageRef* ref = fHead;
     
     while (ref != NULL) {
-        SkDebugf("  [%3d %3d %d] ram=%d data=%d locks=%d %s\n", ref->fBitmap.width(),
+        SkDebugf("  [%3d %3d %d] ram=%d data=%d locked=%d %s\n", ref->fBitmap.width(),
                  ref->fBitmap.height(), ref->fBitmap.config(),
                  ref->ramUsed(), (int)ref->fStream->getLength(),
-                 ref->getLockCount(), ref->getURI());
+                 ref->isLocked(), ref->getURI());
         
         ref = ref->fNext;
     }
diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp
index 20a82c8..dd8e406 100644
--- a/src/opts/SkBlitRow_opts_arm.cpp
+++ b/src/opts/SkBlitRow_opts_arm.cpp
@@ -404,6 +404,75 @@
 #define S32A_D565_Opaque_PROC       S32A_D565_Opaque_neon
 #define S32A_D565_Blend_PROC        S32A_D565_Blend_neon
 #define S32_D565_Blend_Dither_PROC  S32_D565_Blend_Dither_neon
+#elif __ARM_ARCH__ >= 7 && !defined(SK_CPU_BENDIAN)
+static void S32A_D565_Opaque_v7(uint16_t* SK_RESTRICT dst,
+                                  const SkPMColor* SK_RESTRICT src, int count,
+                                  U8CPU alpha, int /*x*/, int /*y*/) {
+    SkASSERT(255 == alpha);
+
+    asm volatile (
+                  "1:                                   \n\t"
+                  "ldr     r3, [%[src]], #4             \n\t"
+                  "cmp     r3, #0xff000000              \n\t"
+                  "blo     2f                           \n\t"
+                  "and     r4, r3, #0x0000f8            \n\t"
+                  "and     r5, r3, #0x00fc00            \n\t"
+                  "and     r6, r3, #0xf80000            \n\t"
+                  "pld     [r1, #32]                    \n\t"
+                  "lsl     r3, r4, #8                   \n\t"
+                  "orr     r3, r3, r5, lsr #5           \n\t"
+                  "orr     r3, r3, r6, lsr #19          \n\t"
+                  "subs    %[count], %[count], #1       \n\t"
+                  "strh    r3, [%[dst]], #2             \n\t"
+                  "bne     1b                           \n\t"
+                  "b       4f                           \n\t"
+                  "2:                                   \n\t"
+                  "lsrs    r7, r3, #24                  \n\t"
+                  "beq     3f                           \n\t"
+                  "ldrh    r4, [%[dst]]                 \n\t"
+                  "rsb     r7, r7, #255                 \n\t"
+                  "and     r6, r4, #0x001f              \n\t"
+                  "ubfx    r5, r4, #5, #6               \n\t"
+                  "pld     [r0, #16]                    \n\t"
+                  "lsr     r4, r4, #11                  \n\t"
+                  "smulbb  r6, r6, r7                   \n\t"
+                  "smulbb  r5, r5, r7                   \n\t"
+                  "smulbb  r4, r4, r7                   \n\t"
+                  "ubfx    r7, r3, #16, #8              \n\t"
+                  "ubfx    ip, r3, #8, #8               \n\t"
+                  "and     r3, r3, #0xff                \n\t"
+                  "add     r6, r6, #16                  \n\t"
+                  "add     r5, r5, #32                  \n\t"
+                  "add     r4, r4, #16                  \n\t"
+                  "add     r6, r6, r6, lsr #5           \n\t"
+                  "add     r5, r5, r5, lsr #6           \n\t"
+                  "add     r4, r4, r4, lsr #5           \n\t"
+                  "add     r6, r7, r6, lsr #5           \n\t"
+                  "add     r5, ip, r5, lsr #6           \n\t"
+                  "add     r4, r3, r4, lsr #5           \n\t"
+                  "lsr     r6, r6, #3                   \n\t"
+                  "and     r5, r5, #0xfc                \n\t"
+                  "and     r4, r4, #0xf8                \n\t"
+                  "orr     r6, r6, r5, lsl #3           \n\t"
+                  "orr     r4, r6, r4, lsl #8           \n\t"
+                  "strh    r4, [%[dst]], #2             \n\t"
+                  "pld     [r1, #32]                    \n\t"
+                  "subs    %[count], %[count], #1       \n\t"
+                  "bne     1b                           \n\t"
+                  "b       4f                           \n\t"
+                  "3:                                   \n\t"
+                  "subs    %[count], %[count], #1       \n\t"
+                  "add     %[dst], %[dst], #2           \n\t"
+                  "bne     1b                           \n\t"
+                  "4:                                   \n\t"
+                  : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count)
+                  :
+                  : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "ip"
+                  );
+}
+#define S32A_D565_Opaque_PROC       S32A_D565_Opaque_v7
+#define S32A_D565_Blend_PROC        NULL
+#define S32_D565_Blend_Dither_PROC  NULL
 #else
 #define S32A_D565_Opaque_PROC       NULL
 #define S32A_D565_Blend_PROC        NULL
@@ -418,7 +487,181 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
+#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN) && defined(TEST_SRC_ALPHA)
+
+static void S32A_Opaque_BlitRow32_neon_test_alpha(SkPMColor* SK_RESTRICT dst,
+                                  const SkPMColor* SK_RESTRICT src,
+                                  int count, U8CPU alpha) {
+	SkASSERT(255 == alpha);
+	if (count <= 0)
+	return;
+
+	/* Use these to check if src is transparent or opaque */
+	const unsigned int ALPHA_OPAQ  = 0xFF000000;
+	const unsigned int ALPHA_TRANS = 0x00FFFFFF;
+
+#define UNROLL  4
+	const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1);
+	const SkPMColor* SK_RESTRICT src_temp = src;
+
+	/* set up the NEON variables */
+	uint8x8_t alpha_mask;
+	static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
+	alpha_mask = vld1_u8(alpha_mask_setup);
+
+	uint8x8_t src_raw, dst_raw, dst_final;
+	uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
+	uint8x8_t dst_cooked;
+	uint16x8_t dst_wide;
+	uint8x8_t alpha_narrow;
+	uint16x8_t alpha_wide;
+
+	/* choose the first processing type */
+	if( src >= src_end)
+		goto TAIL;
+	if(*src <= ALPHA_TRANS)
+		goto ALPHA_0;
+	if(*src >= ALPHA_OPAQ)
+		goto ALPHA_255;
+	/* fall-thru */
+
+ALPHA_1_TO_254:
+	do {
+
+		/* get the source */
+		src_raw = vreinterpret_u8_u32(vld1_u32(src));
+		src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2));
+
+		/* get and hold the dst too */
+		dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
+		dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
+
+
+		/* get the alphas spread out properly */
+		alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
+		/* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
+		/* we collapsed (255-a)+1 ... */
+		alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
+
+		/* spread the dest */
+		dst_wide = vmovl_u8(dst_raw);
+
+		/* alpha mul the dest */
+		dst_wide = vmulq_u16 (dst_wide, alpha_wide);
+		dst_cooked = vshrn_n_u16(dst_wide, 8);
+
+		/* sum -- ignoring any byte lane overflows */
+		dst_final = vadd_u8(src_raw, dst_cooked);
+
+		alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
+		/* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
+		/* we collapsed (255-a)+1 ... */
+		alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
+
+		/* spread the dest */
+		dst_wide = vmovl_u8(dst_raw_2);
+
+		/* alpha mul the dest */
+		dst_wide = vmulq_u16 (dst_wide, alpha_wide);
+		dst_cooked = vshrn_n_u16(dst_wide, 8);
+
+		/* sum -- ignoring any byte lane overflows */
+		dst_final_2 = vadd_u8(src_raw_2, dst_cooked);
+
+		vst1_u32(dst, vreinterpret_u32_u8(dst_final));
+		vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2));
+
+		src += UNROLL;
+		dst += UNROLL;
+
+		/* if 2 of the next pixels aren't between 1 and 254
+		it might make sense to go to the optimized loops */
+		if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ))
+			break;
+
+	} while(src < src_end);
+
+	if (src >= src_end)
+		goto TAIL;
+
+	if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)
+		goto ALPHA_255;
+
+	/*fall-thru*/
+
+ALPHA_0:
+
+	/*In this state, we know the current alpha is 0 and
+	 we optimize for the next alpha also being zero. */
+	src_temp = src;  //so we don't have to increment dst every time
+	do {
+		if(*(++src) > ALPHA_TRANS)
+			break;
+		if(*(++src) > ALPHA_TRANS)
+			break;
+		if(*(++src) > ALPHA_TRANS)
+			break;
+		if(*(++src) > ALPHA_TRANS)
+			break;
+	} while(src < src_end);
+
+	dst += (src - src_temp);
+
+	/* no longer alpha 0, so determine where to go next. */
+	if( src >= src_end)
+		goto TAIL;
+	if(*src >= ALPHA_OPAQ)
+		goto ALPHA_255;
+	else
+		goto ALPHA_1_TO_254;
+
+ALPHA_255:
+	while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) {
+		dst[0]=src[0];
+		dst[1]=src[1];
+		dst[2]=src[2];
+		dst[3]=src[3];
+		src+=UNROLL;
+		dst+=UNROLL;
+		if(src >= src_end)
+			goto TAIL;
+	}
+
+	//Handle remainder.
+	if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
+		if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
+			if(*src >= ALPHA_OPAQ) { *dst++ = *src++; }
+		}
+	}
+
+	if( src >= src_end)
+		goto TAIL;
+	if(*src <= ALPHA_TRANS)
+		goto ALPHA_0;
+	else
+		goto ALPHA_1_TO_254;
+
+TAIL:
+	/* do any residual iterations */
+	src_end += UNROLL + 1;  //goto the real end
+	while(src != src_end) {
+		if( *src != 0 ) {
+			if( *src >= ALPHA_OPAQ ) {
+				*dst = *src;
+			}
+			else {
+				*dst = SkPMSrcOver(*src, *dst);
+			}
+		}
+		src++;
+		dst++;
+	}
+	return;
+}
+
+#define S32A_Opaque_BlitRow32_PROC  S32A_Opaque_BlitRow32_neon_test_alpha
+
+#elif defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
 
 static void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
                                   const SkPMColor* SK_RESTRICT src,
@@ -544,11 +787,312 @@
 
 #define	S32A_Opaque_BlitRow32_PROC	S32A_Opaque_BlitRow32_neon
 
-#else
+#elif defined (__ARM_ARCH__) /* #if defined(__ARM_HAVE_NEON) && defined... */
 
-#ifdef TEST_SRC_ALPHA
-#error The ARM asm version of S32A_Opaque_BlitRow32 does not support TEST_SRC_ALPHA
-#endif
+#if defined(TEST_SRC_ALPHA)
+
+static void __attribute__((naked)) S32A_Opaque_BlitRow32_arm_test_alpha
+                                        (SkPMColor* SK_RESTRICT dst,
+                                         const SkPMColor* SK_RESTRICT src,
+                                         int count, U8CPU alpha) {
+
+/* Optimizes for alpha == 0, alpha == 255, and 1 < alpha < 255 cases individually */
+/* Predicts that the next pixel will have the same alpha type as the current pixel */
+
+asm volatile (
+
+    "\tSTMDB  r13!, {r4-r12, r14}        \n" /* saving r4-r12, lr on the stack */
+                                             /* we should not save r0-r3 according to ABI */
+
+    "\tCMP    r2, #0                     \n" /* if (count == 0) */
+    "\tBEQ    9f                         \n" /* go to EXIT */
+
+    "\tMOV    r12, #0xff                 \n" /* load the 0xff mask in r12 */
+    "\tORR    r12, r12, r12, LSL #16     \n" /* convert it to 0xff00ff in r12 */
+
+    "\tMOV    r14, #255                  \n" /* r14 = 255 */
+                                             /* will be used later for left-side comparison */
+
+    "\tADD    r2, %[src], r2, LSL #2     \n" /* r2 points to last array element which can be used */
+    "\tSUB    r2, r2, #16                \n" /* as a base for 4-way processing algorithm */
+
+    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer is bigger than */
+    "\tBGT    8f                         \n" /* calculated marker for 4-way -> */
+                                             /* use simple one-by-one processing */
+
+    /* START OF DISPATCHING BLOCK */
+
+    "\t0:                                \n"
+
+    "\tLDM    %[src]!, {r3, r4, r5, r6}  \n" /* 4-way loading of source values to r3-r6 */
+
+    "\tLSR    r7, r3, #24                \n" /* if not all src alphas of 4-way block are equal -> */
+    "\tCMP    r7, r4, LSR #24            \n"
+    "\tCMPEQ  r7, r5, LSR #24            \n"
+    "\tCMPEQ  r7, r6, LSR #24            \n"
+    "\tBNE    1f                         \n" /* -> go to general 4-way processing routine */
+
+    "\tCMP    r14, r7                    \n" /* if all src alphas are equal to 255 */
+    "\tBEQ    3f                         \n" /* go to alpha == 255 optimized routine */
+
+    "\tCMP    r7,  #0                    \n" /* if all src alphas are equal to 0 */
+    "\tBEQ    6f                         \n" /* go to alpha == 0 optimized routine */
+
+    /* END OF DISPATCHING BLOCK */
+
+    /* START OF BLOCK OPTIMIZED FOR 0 < ALPHA < 255 */
+
+    "\t1:                                \n"
+                                             /* we do not have enough registers to make */
+                                             /* 4-way [dst] loading -> we are using 2 * 2-way */
+
+    "\tLDM    %[dst], {r7, r8}           \n" /* 1st 2-way loading of dst values to r7-r8 */
+
+    /* PROCESSING BLOCK 1 */
+    /* r3 = src, r7 = dst */
+
+    "\tLSR    r11, r3,  #24              \n" /* extracting alpha from source and storing to r11 */
+    "\tAND    r9,  r12, r7               \n" /* r9 = br masked by r12 (0xff00ff) */
+    "\tRSB    r11, r11, #256             \n" /* subtracting the alpha from 255 -> r11 = scale */
+    "\tAND    r10, r12, r7, LSR #8       \n" /* r10 = ag masked by r12 (0xff00ff) */
+    "\tMUL    r9,  r9,  r11              \n" /* br = br * scale */
+    "\tAND    r9,  r12, r9, LSR #8       \n" /* lsr br by 8 and mask it */
+    "\tMUL    r10, r10, r11              \n" /* ag = ag * scale */
+    "\tAND    r10, r10, r12, LSL #8      \n" /* mask ag with reverse mask */
+    "\tORR    r7,  r9,  r10              \n" /* br | ag */
+    "\tADD    r7,  r3,  r7               \n" /* dst = src + calc dest(r8) */
+
+    /* PROCESSING BLOCK 2 */
+    /* r4 = src, r8 = dst */
+
+    "\tLSR    r11, r4,  #24              \n" /* see PROCESSING BLOCK 1 */
+    "\tAND    r9,  r12, r8               \n"
+    "\tRSB    r11, r11, #256             \n"
+    "\tAND    r10, r12, r8, LSR #8       \n"
+    "\tMUL    r9,  r9,  r11              \n"
+    "\tAND    r9,  r12, r9, LSR #8       \n"
+    "\tMUL    r10, r10, r11              \n"
+    "\tAND    r10, r10, r12, LSL #8      \n"
+    "\tORR    r8,  r9,  r10              \n"
+    "\tADD    r8,  r4,  r8               \n"
+
+    "\tSTM    %[dst]!, {r7, r8}          \n" /* 1st 2-way storing of processed dst values */
+
+    "\tLDM    %[dst], {r9, r10}          \n" /* 2nd 2-way loading of dst values to r9-r10 */
+
+    /* PROCESSING BLOCK 3 */
+    /* r5 = src, r9 = dst */
+
+    "\tLSR    r11, r5,  #24              \n" /* see PROCESSING BLOCK 1 */
+    "\tAND    r7,  r12, r9               \n"
+    "\tRSB    r11, r11, #256             \n"
+    "\tAND    r8,  r12, r9, LSR #8       \n"
+    "\tMUL    r7,  r7,  r11              \n"
+    "\tAND    r7,  r12, r7, LSR #8       \n"
+    "\tMUL    r8,  r8,  r11              \n"
+    "\tAND    r8,  r8,  r12, LSL #8      \n"
+    "\tORR    r9,  r7,  r8               \n"
+    "\tADD    r9,  r5,  r9               \n"
+
+    /* PROCESSING BLOCK 4 */
+    /* r6 = src, r10 = dst */
+
+    "\tLSR    r11, r6,  #24              \n" /* see PROCESSING BLOCK 1 */
+    "\tAND    r7,  r12, r10              \n"
+    "\tRSB    r11, r11, #256             \n"
+    "\tAND    r8,  r12, r10, LSR #8      \n"
+    "\tMUL    r7,  r7,  r11              \n"
+    "\tAND    r7,  r12, r7, LSR #8       \n"
+    "\tMUL    r8,  r8,  r11              \n"
+    "\tAND    r8,  r8,  r12, LSL #8      \n"
+    "\tORR    r10, r7,  r8               \n"
+    "\tADD    r10, r6,  r10              \n"
+
+    "\tSTM    %[dst]!, {r9, r10}         \n" /* 2nd 2-way storing of processed dst values */
+
+    "\tCMP    %[src], r2                 \n" /* if our current [src] pointer <= calculated marker */
+    "\tBLE    0b                         \n" /* we could run 4-way processing -> go to dispatcher */
+    "\tBGT    8f                         \n" /* else -> use simple one-by-one processing */
+
+    /* END OF BLOCK OPTIMIZED FOR 0 < ALPHA < 255 */
+
+    /* START OF BLOCK OPTIMIZED FOR ALPHA == 255 */
+
+    "\t2:                                \n" /* ENTRY 1: LOADING [src] to registers */
+
+    "\tLDM    %[src]!, {r3, r4, r5, r6}  \n" /* 4-way loading of source values to r3-r6 */
+
+    "\tAND    r7, r3, r4                 \n" /* if not all alphas == 255 -> */
+    "\tAND    r8, r5, r6                 \n"
+    "\tAND    r9, r7, r8                 \n"
+    "\tCMP    r14, r9, LSR #24           \n"
+    "\tBNE    4f                         \n" /* -> go to alpha == 0 check */
+
+    "\t3:                                \n" /* ENTRY 2: [src] already loaded by DISPATCHER */
+
+    "\tSTM    %[dst]!, {r3, r4, r5, r6}  \n" /* all alphas == 255 -> 4-way copy [src] to [dst] */
+
+    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer <= marker */
+    "\tBLE    2b                         \n" /* we could run 4-way processing */
+                                             /* because now we're in ALPHA == 255 state */
+                                             /* run next cycle with priority alpha == 255 checks */
+
+    "\tBGT    8f                         \n" /* if our current [src] array pointer > marker */
+                                             /* use simple one-by-one processing */
+
+    "\t4:                                \n"
+
+    "\tORR    r7, r3, r4                 \n" /* if not all alphas == 0 -> */
+    "\tORR    r8, r5, r6                 \n"
+    "\tORR    r9, r7, r8                 \n"
+    "\tLSRS   r9, #24                    \n"
+    "\tBNE    1b                         \n" /* -> go to general processing mode */
+                                             /* (we already checked for alpha == 255) */
+
+    "\tADD    %[dst], %[dst], #16        \n" /* all src alphas == 0 -> do not change dst values */
+
+    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer <= marker */
+    "\tBLE    5f                         \n" /* we could run 4-way processing one more time */
+                                             /* because now we're in ALPHA == 0 state */
+                                             /* run next cycle with priority alpha == 0 checks */
+
+    "\tBGT    8f                         \n" /* if our current [src] array pointer > marker */
+                                             /* use simple one-by-one processing */
+
+    /* END OF BLOCK OPTIMIZED FOR ALPHA == 255 */
+
+    /* START OF BLOCK OPTIMIZED FOR ALPHA == 0 */
+
+    "\t5:                                \n" /* ENTRY 1: LOADING [src] to registers */
+
+    "\tLDM    %[src]!, {r3, r4, r5, r6}  \n" /* 4-way loading of source values to r3-r6 */
+
+    "\tORR    r7, r3, r4                 \n" /* if not all alphas == 0 -> */
+    "\tORR    r8, r5, r6                 \n"
+    "\tORR    r9, r7, r8                 \n"
+    "\tLSRS   r9, #24                    \n"
+    "\tBNE    7f                         \n" /* -> go to alpha == 255 check */
+
+    "\t6:                                \n" /* ENTRY 2: [src] already loaded by DISPATCHER */
+
+    "\tADD    %[dst], %[dst], #16        \n" /* all src alphas == 0 -> do not change dst values */
+
+    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer <= marker */
+    "\tBLE    5b                         \n" /* we could run 4-way processing one more time */
+                                             /* because now we're in ALPHA == 0 state */
+                                             /* run next cycle with priority alpha == 0 checks */
+
+    "\tBGT    8f                         \n" /* if our current [src] array pointer > marker */
+                                             /* use simple one-by-one processing */
+    "\t7:                                \n"
+
+    "\tAND    r7, r3, r4                 \n" /* if not all alphas == 255 -> */
+    "\tAND    r8, r5, r6                 \n"
+    "\tAND    r9, r7, r8                 \n"
+    "\tCMP    r14, r9, LSR #24           \n"
+    "\tBNE    1b                         \n" /* -> go to general processing mode */
+                                             /* (we already checked for alpha == 0) */
+
+    "\tSTM    %[dst]!, {r3, r4, r5, r6}  \n" /* all alphas == 255 -> 4-way copy [src] to [dst] */
+
+    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer <= marker */
+    "\tBLE    2b                         \n" /* we could run 4-way processing one more time */
+                                             /* because now we're in ALPHA == 255 state */
+                                             /* run next cycle with priority alpha == 255 checks */
+
+    "\tBGT    8f                         \n" /* if our current [src] array pointer > marker */
+                                             /* use simple one-by-one processing */
+
+    /* END OF BLOCK OPTIMIZED FOR ALPHA == 0 */
+
+    /* START OF TAIL BLOCK */
+    /* (used when array is too small to be processed with 4-way algorithm)*/
+
+    "\t8:                                \n"
+
+    "\tADD    r2, r2, #16                \n" /* now r2 points to the element just after array */
+                                             /* we've done r2 = r2 - 16 at procedure start */
+
+    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer > final marker */
+    "\tBEQ    9f                         \n" /* goto EXIT */
+
+    /* TAIL PROCESSING BLOCK 1 */
+
+    "\tLDR    r3, [%[src]], #4           \n" /* r3 = *src, src++ */
+    "\tLDR    r7, [%[dst]]               \n" /* r7 = *dst */
+
+    "\tLSR    r11, r3,  #24              \n" /* extracting alpha from source */
+    "\tAND    r9,  r12, r7               \n" /* r9 = br masked by r12 (0xff00ff) */
+    "\tRSB    r11, r11, #256             \n" /* subtracting the alpha from 255 -> r11 = scale */
+    "\tAND    r10, r12, r7, LSR #8       \n" /* r10 = ag masked by r12 (0xff00ff) */
+    "\tMUL    r9,  r9,  r11              \n" /* br = br * scale */
+    "\tAND    r9,  r12, r9, LSR #8       \n" /* lsr br by 8 and mask it */
+    "\tMUL    r10, r10, r11              \n" /* ag = ag * scale */
+    "\tAND    r10, r10, r12, LSL #8      \n" /* mask ag with reverse mask */
+    "\tORR    r7,  r9,  r10              \n" /* br | ag */
+    "\tADD    r7,  r3,  r7               \n" /* dst = src + calc dest(r8) */
+
+    "\tSTR    r7, [%[dst]], #4           \n" /* *dst = r7; dst++ */
+
+    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer > final marker */
+    "\tBEQ    9f                         \n" /* goto EXIT */
+
+    /* TAIL PROCESSING BLOCK 2 */
+
+    "\tLDR    r3, [%[src]], #4           \n" /* see TAIL PROCESSING BLOCK 1 */
+    "\tLDR    r7, [%[dst]]               \n"
+
+    "\tLSR    r11, r3,  #24              \n"
+    "\tAND    r9,  r12, r7               \n"
+    "\tRSB    r11, r11, #256             \n"
+    "\tAND    r10, r12, r7, LSR #8       \n"
+    "\tMUL    r9,  r9,  r11              \n"
+    "\tAND    r9,  r12, r9, LSR #8       \n"
+    "\tMUL    r10, r10, r11              \n"
+    "\tAND    r10, r10, r12, LSL #8      \n"
+    "\tORR    r7,  r9,  r10              \n"
+    "\tADD    r7,  r3,  r7               \n"
+
+    "\tSTR    r7, [%[dst]], #4           \n"
+
+    "\tCMP    %[src], r2                 \n"
+    "\tBEQ    9f                         \n"
+
+    /* TAIL PROCESSING BLOCK 3 */
+
+    "\tLDR    r3, [%[src]], #4           \n" /* see TAIL PROCESSING BLOCK 1 */
+    "\tLDR    r7, [%[dst]]               \n"
+
+    "\tLSR    r11, r3,  #24              \n"
+    "\tAND    r9,  r12, r7               \n"
+    "\tRSB    r11, r11, #256             \n"
+    "\tAND    r10, r12, r7, LSR #8       \n"
+    "\tMUL    r9,  r9,  r11              \n"
+    "\tAND    r9,  r12, r9, LSR #8       \n"
+    "\tMUL    r10, r10, r11              \n"
+    "\tAND    r10, r10, r12, LSL #8      \n"
+    "\tORR    r7,  r9,  r10              \n"
+    "\tADD    r7,  r3,  r7               \n"
+
+    "\tSTR    r7, [%[dst]], #4           \n"
+
+    /* END OF TAIL BLOCK */
+
+    "\t9:                                \n" /* EXIT */
+
+    "\tLDMIA  r13!, {r4-r12, r14}        \n" /* restoring r4-r12, lr from stack */
+    "\tBX     lr                         \n" /* return */
+
+    : [dst] "+r" (dst), [src] "+r" (src)
+    :
+    : "cc", "r2", "r3", "memory"
+
+    );
+
+}
+
+#define	S32A_Opaque_BlitRow32_PROC S32A_Opaque_BlitRow32_arm_test_alpha
+#else /* !defined(TEST_SRC_ALPHA) */
 
 static void S32A_Opaque_BlitRow32_arm(SkPMColor* SK_RESTRICT dst,
                                   const SkPMColor* SK_RESTRICT src,
@@ -642,6 +1186,9 @@
                   );
 }
 #define	S32A_Opaque_BlitRow32_PROC	S32A_Opaque_BlitRow32_arm
+#endif /* !defined(TEST_SRC_ALPHA) */
+#else /* ... #elif defined (__ARM_ARCH__) */
+#define	S32A_Opaque_BlitRow32_PROC	NULL
 #endif
 
 /*
diff --git a/src/ports/FontHostConfiguration_android.cpp b/src/ports/FontHostConfiguration_android.cpp
index 475dc4a..d1164c8 100644
--- a/src/ports/FontHostConfiguration_android.cpp
+++ b/src/ports/FontHostConfiguration_android.cpp
@@ -16,8 +16,12 @@
 */
 
 #include "FontHostConfiguration_android.h"
-#include <expat.h>
+#include "SkString.h"
 #include "SkTDArray.h"
+#include <expat.h>
+#if !defined(SK_BUILD_FOR_ANDROID_NDK)
+    #include <cutils/properties.h>
+#endif
 
 #define SYSTEM_FONTS_FILE "/system/etc/system_fonts.xml"
 #define FALLBACK_FONTS_FILE "/system/etc/fallback_fonts.xml"
@@ -127,6 +131,65 @@
     }
 }
 
+#if !defined(SK_BUILD_FOR_ANDROID_NDK)
+/**
+ * Read the persistent locale.
+ */
+void getLocale(char* language, char* region)
+{
+    char propLang[PROPERTY_VALUE_MAX], propRegn[PROPERTY_VALUE_MAX];
+
+    property_get("persist.sys.language", propLang, "");
+    property_get("persist.sys.country", propRegn, "");
+    if (*propLang == 0 && *propRegn == 0) {
+        /* Set to ro properties, default is en_US */
+        property_get("ro.product.locale.language", propLang, "en");
+        property_get("ro.product.locale.region", propRegn, "US");
+    }
+    strncat(language, propLang, 2);
+    strncat(region, propRegn, 2);
+}
+#endif
+
+/**
+ * Use the current system locale (language and region) to open the best matching
+ * customization. For example, when the language is Japanese, the sequence might be:
+ *      /system/etc/fallback_fonts-ja-JP.xml
+ *      /system/etc/fallback_fonts-ja.xml
+ *      /system/etc/fallback_fonts.xml
+ */
+FILE* openLocalizedFile(const char* origname) {
+    FILE* file = 0;
+
+#if !defined(SK_BUILD_FOR_ANDROID_NDK)
+    SkString basename;
+    SkString filename;
+    char language[3] = "";
+    char region[3] = "";
+
+    basename.set(origname);
+    // Remove the .xml suffix. We'll add it back in a moment.
+    if (basename.endsWith(".xml")) {
+        basename.resize(basename.size()-4);
+    }
+    getLocale(language, region);
+    // Try first with language and region
+    filename.printf("%s-%s-%s.xml", basename.c_str(), language, region);
+    file = fopen(filename.c_str(), "r");
+    if (!file) {
+        // If not found, try next with just language
+        filename.printf("%s-%s.xml", basename.c_str(), language);
+        file = fopen(filename.c_str(), "r");
+    }
+#endif
+
+    if (!file) {
+        // If still not found, try just the original name
+        file = fopen(origname, "r");
+    }
+    return file;
+}
+
 /**
  * This function parses the given filename and stores the results in the given
  * families array.
@@ -136,7 +199,7 @@
     FamilyData *familyData = new FamilyData(&parser, families);
     XML_SetUserData(parser, familyData);
     XML_SetElementHandler(parser, startElementHandler, endElementHandler);
-    FILE *file = fopen(filename, "r");
+    FILE *file = openLocalizedFile(filename);
     // Some of the files we attempt to parse (in particular, /vendor/etc/fallback_fonts.xml)
     // are optional - failure here is okay because one of these optional files may not exist.
     if (file == NULL) {
@@ -154,15 +217,12 @@
     }
 }
 
-/**
- * Loads data on font families from various expected configuration files. The
- * resulting data is returned in the given fontFamilies array.
- */
-void getFontFamilies(SkTDArray<FontFamily*> &fontFamilies) {
-
-    SkTDArray<FontFamily*> fallbackFonts;
-    SkTDArray<FontFamily*> vendorFonts;
+void getSystemFontFamilies(SkTDArray<FontFamily*> &fontFamilies) {
     parseConfigFile(SYSTEM_FONTS_FILE, fontFamilies);
+}
+
+void getFallbackFontFamilies(SkTDArray<FontFamily*> &fallbackFonts) {
+    SkTDArray<FontFamily*> vendorFonts;
     parseConfigFile(FALLBACK_FONTS_FILE, fallbackFonts);
     parseConfigFile(VENDOR_FONTS_FILE, vendorFonts);
 
@@ -188,6 +248,18 @@
             currentOrder = order + 1;
         }
     }
+}
+
+/**
+ * Loads data on font families from various expected configuration files. The
+ * resulting data is returned in the given fontFamilies array.
+ */
+void getFontFamilies(SkTDArray<FontFamily*> &fontFamilies) {
+    SkTDArray<FontFamily*> fallbackFonts;
+
+    getSystemFontFamilies(fontFamilies);
+    getFallbackFontFamilies(fallbackFonts);
+
     // Append all fallback fonts to system fonts
     for (int i = 0; i < fallbackFonts.count(); ++i) {
         *fontFamilies.append() = fallbackFonts[i];
diff --git a/src/ports/FontHostConfiguration_android.h b/src/ports/FontHostConfiguration_android.h
index 010f0ef..2441f0e 100644
--- a/src/ports/FontHostConfiguration_android.h
+++ b/src/ports/FontHostConfiguration_android.h
@@ -39,4 +39,21 @@
  */
 void getFontFamilies(SkTDArray<FontFamily*> &fontFamilies);
 
+/**
+ * Parse only the core system font configuration file and return the results in
+ * an array of FontFamily structures.
+ */
+void getSystemFontFamilies(SkTDArray<FontFamily*> &fontFamilies);
+
+
+/**
+ * Parse the fallback and vendor system font configuration files and return the
+ * results in an array of FontFamily structures.
+ */
+void getFallbackFontFamilies(SkTDArray<FontFamily*> &fallbackFonts);
+
+#if !defined(SK_BUILD_FOR_ANDROID_NDK)
+    void getLocale(char* language, char* region);
+#endif
+
 #endif /* FONTHOSTCONFIGURATION_ANDROID_H_ */
diff --git a/src/ports/SkFontHost_FreeType.cpp b/src/ports/SkFontHost_FreeType.cpp
index da1040d..621c94a 100644
--- a/src/ports/SkFontHost_FreeType.cpp
+++ b/src/ports/SkFontHost_FreeType.cpp
@@ -105,6 +105,11 @@
 // This value was chosen by eyeballing the result in Firefox and trying to match it.
 static const FT_Pos kBitmapEmboldenStrength = 1 << 6;
 
+// convert from Skia's fixed (16.16) to FreeType's fixed (26.6) representation
+static inline int FixedToDot6(SkFixed x) { return x >> 10; }
+// convert from FreeType's fixed (26.6) to Skia's fixed (16.16) representation
+static inline SkFixed Dot6ToFixed(int x) { return x << 10; }
+
 static bool
 InitFreetype() {
     FT_Error err = FT_Init_FreeType(&gFTLibrary);
@@ -162,6 +167,9 @@
 
     FT_Error setupSize();
     void emboldenOutline(FT_Outline* outline);
+    void getBBoxForCurrentGlyph(SkGlyph* glyph, FT_BBox* bbox,
+                                bool snapToPixelBoundary = false);
+    void updateGlyphIfLCD(SkGlyph* glyph);
 };
 
 ///////////////////////////////////////////////////////////////////////////
@@ -954,6 +962,38 @@
     return;
 }
 
+void SkScalerContext_FreeType::getBBoxForCurrentGlyph(SkGlyph* glyph,
+                                                      FT_BBox* bbox,
+                                                      bool snapToPixelBoundary) {
+
+    FT_Outline_Get_CBox(&fFace->glyph->outline, bbox);
+
+    if (fRec.fFlags & SkScalerContext::kSubpixelPositioning_Flag) {
+        int dx = FixedToDot6(glyph->getSubXFixed());
+        int dy = FixedToDot6(glyph->getSubYFixed());
+        // negate dy since freetype-y-goes-up and skia-y-goes-down
+        bbox->xMin += dx;
+        bbox->yMin -= dy;
+        bbox->xMax += dx;
+        bbox->yMax -= dy;
+    }
+
+    // outset the box to integral boundaries
+    if (snapToPixelBoundary) {
+        bbox->xMin &= ~63;
+        bbox->yMin &= ~63;
+        bbox->xMax  = (bbox->xMax + 63) & ~63;
+        bbox->yMax  = (bbox->yMax + 63) & ~63;
+    }
+}
+
+void SkScalerContext_FreeType::updateGlyphIfLCD(SkGlyph* glyph) {
+    if (isLCD(fRec)) {
+        glyph->fWidth += gLCDExtra;
+        glyph->fLeft -= gLCDExtra >> 1;
+    }
+}
+
 void SkScalerContext_FreeType::generateMetrics(SkGlyph* glyph) {
     SkAutoMutexAcquire  ac(gFTMutex);
 
@@ -975,6 +1015,8 @@
         return;
     }
 
+    SkFixed vLeft, vTop;
+
     switch ( fFace->glyph->format ) {
       case FT_GLYPH_FORMAT_OUTLINE: {
         FT_BBox bbox;
@@ -987,40 +1029,29 @@
             break;
         }
 
-        if (fRec.fFlags & kEmbolden_Flag) {
+        if ((fRec.fFlags & kEmbolden_Flag) && !(fFace->style_flags & FT_STYLE_FLAG_BOLD)) {
             emboldenOutline(&fFace->glyph->outline);
         }
-        FT_Outline_Get_CBox(&fFace->glyph->outline, &bbox);
 
-        if (fRec.fFlags & SkScalerContext::kSubpixelPositioning_Flag) {
-            int dx = glyph->getSubXFixed() >> 10;
-            int dy = glyph->getSubYFixed() >> 10;
-            // negate dy since freetype-y-goes-up and skia-y-goes-down
-            bbox.xMin += dx;
-            bbox.yMin -= dy;
-            bbox.xMax += dx;
-            bbox.yMax -= dy;
-        }
-
-        bbox.xMin &= ~63;
-        bbox.yMin &= ~63;
-        bbox.xMax  = (bbox.xMax + 63) & ~63;
-        bbox.yMax  = (bbox.yMax + 63) & ~63;
+        getBBoxForCurrentGlyph(glyph, &bbox, true);
 
         glyph->fWidth   = SkToU16((bbox.xMax - bbox.xMin) >> 6);
         glyph->fHeight  = SkToU16((bbox.yMax - bbox.yMin) >> 6);
         glyph->fTop     = -SkToS16(bbox.yMax >> 6);
         glyph->fLeft    = SkToS16(bbox.xMin >> 6);
 
-        if (isLCD(fRec)) {
-            glyph->fWidth += gLCDExtra;
-            glyph->fLeft -= gLCDExtra >> 1;
+        if ((fRec.fFlags & SkScalerContext::kVertical_Flag)) {
+            vLeft = Dot6ToFixed(bbox.xMin);
+            vTop = Dot6ToFixed(bbox.yMax);
         }
+
+        updateGlyphIfLCD(glyph);
+
         break;
       }
 
       case FT_GLYPH_FORMAT_BITMAP:
-        if (fRec.fFlags & kEmbolden_Flag) {
+        if ((fRec.fFlags & kEmbolden_Flag) && !(fFace->style_flags & FT_STYLE_FLAG_BOLD)) {
             FT_GlyphSlot_Own_Bitmap(fFace->glyph);
             FT_Bitmap_Embolden(gFTLibrary, &fFace->glyph->bitmap, kBitmapEmboldenStrength, 0);
         }
@@ -1047,6 +1078,62 @@
         glyph->fAdvanceY = -SkFixedMul(fMatrix22.yx, fFace->glyph->linearHoriAdvance);
     }
 
+    if ((fRec.fFlags & SkScalerContext::kVertical_Flag)
+            && fFace->glyph->format == FT_GLYPH_FORMAT_OUTLINE) {
+
+        //TODO: do we need to specially handle SubpixelPositioning and Kerning?
+
+        FT_Matrix identityMatrix;
+        identityMatrix.xx = identityMatrix.yy = SK_Fixed1;
+        identityMatrix.xy = identityMatrix.yx = 0;
+
+        // if the matrix is not the identity matrix then we need to re-load the
+        // glyph with the identity matrix to get the necessary bounding box
+        if (memcmp(&fMatrix22, &identityMatrix, sizeof(FT_Matrix)) != 0) {
+
+            FT_Set_Transform(fFace, &identityMatrix, NULL);
+
+            err = FT_Load_Glyph( fFace, glyph->getGlyphID(fBaseGlyphCount), fLoadGlyphFlags );
+            if (err != 0) {
+                SkDEBUGF(("SkScalerContext_FreeType::generateMetrics(%x): FT_Load_Glyph(glyph:%d flags:%d) returned 0x%x\n",
+                            fFaceRec->fFontID, glyph->getGlyphID(fBaseGlyphCount), fLoadGlyphFlags, err));
+                goto ERROR;
+            }
+
+            if ((fRec.fFlags & kEmbolden_Flag) && !(fFace->style_flags & FT_STYLE_FLAG_BOLD)) {
+                emboldenOutline(&fFace->glyph->outline);
+            }
+        }
+
+        // bounding box of the unskewed and unscaled glyph
+        FT_BBox bbox;
+        getBBoxForCurrentGlyph(glyph, &bbox);
+
+        // compute the vertical gap above and below the glyph if the glyph were
+        // centered within the linearVertAdvance
+        SkFixed vGap = (fFace->glyph->linearVertAdvance - Dot6ToFixed(bbox.yMax - bbox.yMin)) / 2;
+
+        // the origin point of the glyph when rendered vertically
+        FT_Vector vOrigin;
+        vOrigin.x = fFace->glyph->linearHoriAdvance / 2;
+        vOrigin.y = vGap + Dot6ToFixed(bbox.yMax);
+
+        // transform the vertical origin based on the matrix of the actual glyph
+        FT_Vector_Transform(&vOrigin, &fMatrix22);
+
+        // compute a new offset vector for the glyph by subtracting the vertical
+        // origin from the original horizontal offset vector
+        glyph->fLeft = SkFixedRoundToInt(vLeft - vOrigin.x);
+        glyph->fTop =  -SkFixedRoundToInt(vTop - vOrigin.y);
+
+        updateGlyphIfLCD(glyph);
+
+        // use the vertical advance values computed by freetype
+        glyph->fAdvanceX = -SkFixedMul(fMatrix22.xy, fFace->glyph->linearVertAdvance);
+        glyph->fAdvanceY = SkFixedMul(fMatrix22.yy, fFace->glyph->linearVertAdvance);
+    }
+
+
 #ifdef ENABLE_GLYPH_SPEW
     SkDEBUGF(("FT_Set_Char_Size(this:%p sx:%x sy:%x ", this, fScaleX, fScaleY));
     SkDEBUGF(("Metrics(glyph:%d flags:0x%x) w:%d\n", glyph->getGlyphID(fBaseGlyphCount), fLoadGlyphFlags, glyph->fWidth));
@@ -1243,7 +1330,7 @@
             FT_BBox     bbox;
             FT_Bitmap   target;
 
-            if (fRec.fFlags & kEmbolden_Flag) {
+            if ((fRec.fFlags & kEmbolden_Flag) && !(fFace->style_flags & FT_STYLE_FLAG_BOLD)) {
                 emboldenOutline(outline);
             }
 
@@ -1286,7 +1373,7 @@
         } break;
 
         case FT_GLYPH_FORMAT_BITMAP: {
-            if (fRec.fFlags & kEmbolden_Flag) {
+            if ((fRec.fFlags & kEmbolden_Flag) && !(fFace->style_flags & FT_STYLE_FLAG_BOLD)) {
                 FT_GlyphSlot_Own_Bitmap(fFace->glyph);
                 FT_Bitmap_Embolden(gFTLibrary, &fFace->glyph->bitmap, kBitmapEmboldenStrength, 0);
             }
@@ -1429,7 +1516,7 @@
         return;
     }
 
-    if (fRec.fFlags & kEmbolden_Flag) {
+    if ((fRec.fFlags & kEmbolden_Flag) && !(fFace->style_flags & FT_STYLE_FLAG_BOLD)) {
         emboldenOutline(&fFace->glyph->outline);
     }
 
@@ -1511,7 +1598,7 @@
         if (x_glyph) {
             FT_BBox bbox;
             FT_Load_Glyph(fFace, x_glyph, fLoadGlyphFlags);
-            if (fRec.fFlags & kEmbolden_Flag) {
+            if ((fRec.fFlags & kEmbolden_Flag) && !(fFace->style_flags & FT_STYLE_FLAG_BOLD)) {
                 emboldenOutline(&fFace->glyph->outline);
             }
             FT_Outline_Get_CBox(&fFace->glyph->outline, &bbox);
diff --git a/src/ports/SkFontHost_android.cpp b/src/ports/SkFontHost_android.cpp
index 1856cff..2c58079 100644
--- a/src/ports/SkFontHost_android.cpp
+++ b/src/ports/SkFontHost_android.cpp
@@ -16,6 +16,7 @@
 */
 
 #include "SkFontHost.h"
+#include "SkGraphics.h"
 #include "SkDescriptor.h"
 #include "SkMMapStream.h"
 #include "SkPaint.h"
@@ -25,22 +26,56 @@
 #include "SkTSearch.h"
 #include "FontHostConfiguration_android.h"
 #include <stdio.h>
+#include <string.h>
 
-#define FONT_CACHE_MEMORY_BUDGET    (768 * 1024)
+//#define SkDEBUGF(args       )       SkDebugf args
 
 #ifndef SK_FONT_FILE_PREFIX
     #define SK_FONT_FILE_PREFIX          "/fonts/"
 #endif
 
+// Defined in SkFontHost_FreeType.cpp
 bool find_name_and_attributes(SkStream* stream, SkString* name,
                               SkTypeface::Style* style, bool* isFixedWidth);
 
-static void GetFullPathForSysFonts(SkString* full, const char name[]) {
+static void getFullPathForSysFonts(SkString* full, const char name[]) {
     full->set(getenv("ANDROID_ROOT"));
     full->append(SK_FONT_FILE_PREFIX);
     full->append(name);
 }
 
+static bool getNameAndStyle(const char path[], SkString* name,
+                               SkTypeface::Style* style,
+                               bool* isFixedWidth, bool isExpected) {
+    SkString        fullpath;
+    getFullPathForSysFonts(&fullpath, path);
+
+    SkMMAPStream stream(fullpath.c_str());
+    if (stream.getLength() > 0) {
+        return find_name_and_attributes(&stream, name, style, isFixedWidth);
+    }
+    else {
+        SkFILEStream stream(fullpath.c_str());
+        if (stream.getLength() > 0) {
+            return find_name_and_attributes(&stream, name, style, isFixedWidth);
+        }
+    }
+
+    if (isExpected) {
+        SkDebugf("---- failed to open <%s> as a font\n", fullpath.c_str());
+    }
+    return false;
+}
+
+static SkTypeface* deserializeLocked(SkStream* stream);
+static SkTypeface* createTypefaceLocked(const SkTypeface* familyFace,
+        const char familyName[], const void* data, size_t bytelength,
+        SkTypeface::Style style);
+static SkStream* openStreamLocked(uint32_t fontID);
+static size_t getFileNameLocked(SkFontID fontID, char path[], size_t length, int32_t* index);
+static SkFontID nextLogicalFontLocked(SkFontID currFontID, SkFontID origFontID);
+static SkTypeface* createTypefaceFromStreamLocked(SkStream* stream);
+
 ///////////////////////////////////////////////////////////////////////////////
 
 struct FamilyRec;
@@ -56,7 +91,7 @@
 
     void construct(const char name[], FamilyRec* family) {
         fName = strdup(name);
-        fFamily = family;   // we don't own this, so just record the referene
+        fFamily = family;   // we don't own this, so just record the reference
     }
 
     void destruct() {
@@ -69,36 +104,23 @@
 // we use atomic_inc to grow this for each typeface we create
 static int32_t gUniqueFontID;
 
-// this is the mutex that protects gFamilyHead and GetNameList()
+// this is the mutex that protects all of the global data structures in this module
+// functions with the Locked() suffix must be called while holding this mutex
 SK_DECLARE_STATIC_MUTEX(gFamilyHeadAndNameListMutex);
-static FamilyRec* gFamilyHead;
-
-static NameFamilyPairList& GetNameList() {
-    /*
-     *  It is assumed that the caller has already acquired a lock on
-     *  gFamilyHeadAndNameListMutex before calling this.
-     */
-    static NameFamilyPairList* gNameList;
-    if (NULL == gNameList) {
-        gNameList = SkNEW(NameFamilyPairList);
-        // register a delete proc with sk_atexit(..) when available
-    }
-    return *gNameList;
-}
+static FamilyRec* gFamilyHead = NULL;
+static SkTDArray<NameFamilyPair> gFallbackFilenameList;
+static NameFamilyPairList gNameList;
 
 struct FamilyRec {
     FamilyRec*  fNext;
     SkTypeface* fFaces[4];
 
-    FamilyRec()
-    {
-        fNext = gFamilyHead;
+    FamilyRec() : fNext(NULL) {
         memset(fFaces, 0, sizeof(fFaces));
-        gFamilyHead = this;
     }
 };
 
-static SkTypeface* find_best_face(const FamilyRec* family,
+static SkTypeface* findBestFaceLocked(const FamilyRec* family,
                                   SkTypeface::Style style) {
     SkTypeface* const* faces = family->fFaces;
 
@@ -125,7 +147,7 @@
     return NULL;
 }
 
-static FamilyRec* find_family(const SkTypeface* member) {
+static FamilyRec* findFamilyLocked(const SkTypeface* member) {
     FamilyRec* curr = gFamilyHead;
     while (curr != NULL) {
         for (int i = 0; i < 4; i++) {
@@ -141,7 +163,7 @@
 /*  Returns the matching typeface, or NULL. If a typeface is found, its refcnt
     is not modified.
  */
-static SkTypeface* find_from_uniqueID(uint32_t uniqueID) {
+static SkTypeface* findFromUniqueIDLocked(uint32_t uniqueID) {
     FamilyRec* curr = gFamilyHead;
     while (curr != NULL) {
         for (int i = 0; i < 4; i++) {
@@ -158,8 +180,8 @@
 /*  Remove reference to this face from its family. If the resulting family
     is empty (has no faces), return that family, otherwise return NULL
 */
-static FamilyRec* remove_from_family(const SkTypeface* face) {
-    FamilyRec* family = find_family(face);
+static FamilyRec* removeFromFamilyLocked(const SkTypeface* face) {
+    FamilyRec* family = findFamilyLocked(face);
     if (family) {
         SkASSERT(family->fFaces[face->style()] == face);
         family->fFaces[face->style()] = NULL;
@@ -170,13 +192,13 @@
             }
         }
     } else {
-//        SkDebugf("remove_from_family(%p) face not found", face);
+//        SkDebugf("removeFromFamilyLocked(%p) face not found", face);
     }
     return family;  // return the empty family
 }
 
 // maybe we should make FamilyRec be doubly-linked
-static void detach_and_delete_family(FamilyRec* family) {
+static void detachAndDeleteFamilyLocked(FamilyRec* family) {
     FamilyRec* curr = gFamilyHead;
     FamilyRec* prev = NULL;
 
@@ -197,95 +219,89 @@
     SkASSERT(!"Yikes, couldn't find family in our list to remove/delete");
 }
 
-//  gFamilyHeadAndNameListMutex must already be acquired
-static SkTypeface* find_typeface(const char name[], SkTypeface::Style style) {
-    NameFamilyPairList& namelist = GetNameList();
-    NameFamilyPair* list = namelist.begin();
-    int             count = namelist.count();
-
+static SkTypeface* findTypefaceLocked(const char name[], SkTypeface::Style style) {
+    int count = gNameList.count();
+    NameFamilyPair* list = gNameList.begin();
     int index = SkStrLCSearch(&list[0].fName, count, name, sizeof(list[0]));
-
     if (index >= 0) {
-        return find_best_face(list[index].fFamily, style);
+        return findBestFaceLocked(list[index].fFamily, style);
     }
     return NULL;
 }
 
-//  gFamilyHeadAndNameListMutex must already be acquired
-static SkTypeface* find_typeface(const SkTypeface* familyMember,
+static SkTypeface* findTypefaceLocked(const SkTypeface* familyMember,
                                  SkTypeface::Style style) {
-    const FamilyRec* family = find_family(familyMember);
-    return family ? find_best_face(family, style) : NULL;
+    const FamilyRec* family = findFamilyLocked(familyMember);
+    return family ? findBestFaceLocked(family, style) : NULL;
 }
 
-//  gFamilyHeadAndNameListMutex must already be acquired
-static void add_name(const char name[], FamilyRec* family) {
+static void addNameLocked(const char name[], FamilyRec* family) {
     SkAutoAsciiToLC tolc(name);
     name = tolc.lc();
 
-    NameFamilyPairList& namelist = GetNameList();
-    NameFamilyPair* list = namelist.begin();
-    int             count = namelist.count();
-
+    int count = gNameList.count();
+    NameFamilyPair* list = gNameList.begin();
     int index = SkStrLCSearch(&list[0].fName, count, name, sizeof(list[0]));
-
     if (index < 0) {
-        list = namelist.insert(~index);
+        list = gNameList.insert(~index);
         list->construct(name, family);
     }
 }
 
-//  gFamilyHeadAndNameListMutex must already be acquired
-static void remove_from_names(FamilyRec* emptyFamily) {
+static void removeFromNamesLocked(FamilyRec* emptyFamily) {
 #ifdef SK_DEBUG
     for (int i = 0; i < 4; i++) {
         SkASSERT(emptyFamily->fFaces[i] == NULL);
     }
 #endif
 
-    SkTDArray<NameFamilyPair>& list = GetNameList();
-
     // must go backwards when removing
-    for (int i = list.count() - 1; i >= 0; --i) {
-        NameFamilyPair* pair = &list[i];
-        if (pair->fFamily == emptyFamily) {
-            pair->destruct();
-            list.remove(i);
+    for (int i = gNameList.count() - 1; i >= 0; --i) {
+        NameFamilyPair& pair = gNameList[i];
+        if (pair.fFamily == emptyFamily) {
+            pair.destruct();
+            gNameList.remove(i);
         }
     }
 }
 
+static void addTypefaceLocked(SkTypeface* typeface, SkTypeface* familyMember) {
+    FamilyRec* rec = NULL;
+    if (familyMember) {
+        rec = findFamilyLocked(familyMember);
+        SkASSERT(rec);
+    } else {
+        rec = SkNEW(FamilyRec);
+        rec->fNext = gFamilyHead;
+        gFamilyHead = rec;
+    }
+    rec->fFaces[typeface->style()] = typeface;
+}
+
+static void removeTypeface(SkTypeface* typeface) {
+    SkAutoMutexAcquire ac(gFamilyHeadAndNameListMutex);
+
+    // remove us from our family. If the family is now empty, we return
+    // that and then remove that family from the name list
+    FamilyRec* family = removeFromFamilyLocked(typeface);
+    if (NULL != family) {
+        removeFromNamesLocked(family);
+        detachAndDeleteFamilyLocked(family);
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 class FamilyTypeface : public SkTypeface {
-public:
-    FamilyTypeface(Style style, bool sysFont, SkTypeface* familyMember,
-                   bool isFixedWidth)
+protected:
+    FamilyTypeface(Style style, bool sysFont, bool isFixedWidth)
     : SkTypeface(style, sk_atomic_inc(&gUniqueFontID) + 1, isFixedWidth) {
         fIsSysFont = sysFont;
-
-
-        // our caller has acquired the gFamilyHeadAndNameListMutex so this is safe
-        FamilyRec* rec = NULL;
-        if (familyMember) {
-            rec = find_family(familyMember);
-            SkASSERT(rec);
-        } else {
-            rec = SkNEW(FamilyRec);
-        }
-        rec->fFaces[style] = this;
     }
 
+public:
     virtual ~FamilyTypeface() {
-        SkAutoMutexAcquire  ac(gFamilyHeadAndNameListMutex);
-
-        // remove us from our family. If the family is now empty, we return
-        // that and then remove that family from the name list
-        FamilyRec* family = remove_from_family(this);
-        if (NULL != family) {
-            remove_from_names(family);
-            detach_and_delete_family(family);
-        }
+        removeTypeface(this);
     }
 
     bool isSysFont() const { return fIsSysFont; }
@@ -304,13 +320,13 @@
 
 class StreamTypeface : public FamilyTypeface {
 public:
-    StreamTypeface(Style style, bool sysFont, SkTypeface* familyMember,
-                   SkStream* stream, bool isFixedWidth)
-    : INHERITED(style, sysFont, familyMember, isFixedWidth) {
+    StreamTypeface(Style style, bool sysFont, SkStream* stream, bool isFixedWidth)
+    : INHERITED(style, sysFont, isFixedWidth) {
         SkASSERT(stream);
         stream->ref();
         fStream = stream;
     }
+
     virtual ~StreamTypeface() {
         fStream->unref();
     }
@@ -335,15 +351,8 @@
 
 class FileTypeface : public FamilyTypeface {
 public:
-    FileTypeface(Style style, bool sysFont, SkTypeface* familyMember,
-                 const char path[], bool isFixedWidth)
-    : INHERITED(style, sysFont, familyMember, isFixedWidth) {
-        SkString fullpath;
-
-        if (sysFont) {
-            GetFullPathForSysFonts(&fullpath, path);
-            path = fullpath.c_str();
-        }
+    FileTypeface(Style style, bool sysFont, const char path[], bool isFixedWidth)
+    : INHERITED(style, sysFont, isFixedWidth) {
         fPath.set(path);
     }
 
@@ -383,29 +392,6 @@
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 
-static bool get_name_and_style(const char path[], SkString* name,
-                               SkTypeface::Style* style,
-                               bool* isFixedWidth, bool isExpected) {
-    SkString        fullpath;
-    GetFullPathForSysFonts(&fullpath, path);
-
-    SkMMAPStream stream(fullpath.c_str());
-    if (stream.getLength() > 0) {
-        return find_name_and_attributes(&stream, name, style, isFixedWidth);
-    }
-    else {
-        SkFILEStream stream(fullpath.c_str());
-        if (stream.getLength() > 0) {
-            return find_name_and_attributes(&stream, name, style, isFixedWidth);
-        }
-    }
-
-    if (isExpected) {
-        SkDebugf("---- failed to open <%s> as a font\n", fullpath.c_str());
-    }
-    return false;
-}
-
 // used to record our notion of the pre-existing fonts
 struct FontInitRec {
     const char*         fFileName;
@@ -420,32 +406,100 @@
     list of names (even if that list is empty), and the following members having
     null for the list. The names list must be NULL-terminated.
 */
-static FontInitRec *gSystemFonts;
-static size_t gNumSystemFonts = 0;
+static SkTDArray<FontInitRec> gSystemFonts;
+static SkTDArray<SkFontID> gFallbackFonts;
 
-#define SYSTEM_FONTS_FILE "/system/etc/system_fonts.cfg"
-
-// these globals are assigned (once) by load_system_fonts()
-static FamilyRec* gDefaultFamily;
-static SkTypeface* gDefaultNormal;
+// these globals are assigned (once) by loadSystemFontsLocked()
+static FamilyRec* gDefaultFamily = NULL;
+static SkTypeface* gDefaultNormal = NULL;
 static char** gDefaultNames = NULL;
-static uint32_t *gFallbackFonts;
+
+static void dumpGlobalsLocked() {
+    SkDebugf("gDefaultNormal=%p id=%u refCnt=%d", gDefaultNormal,
+             gDefaultNormal ? gDefaultNormal->uniqueID() : 0,
+             gDefaultNormal ? gDefaultNormal->getRefCnt() : 0);
+
+    if (gDefaultFamily) {
+        SkDebugf("gDefaultFamily=%p fFaces={%u,%u,%u,%u} refCnt={%d,%d,%d,%d}",
+                 gDefaultFamily,
+                 gDefaultFamily->fFaces[0] ? gDefaultFamily->fFaces[0]->uniqueID() : 0,
+                 gDefaultFamily->fFaces[1] ? gDefaultFamily->fFaces[1]->uniqueID() : 0,
+                 gDefaultFamily->fFaces[2] ? gDefaultFamily->fFaces[2]->uniqueID() : 0,
+                 gDefaultFamily->fFaces[3] ? gDefaultFamily->fFaces[3]->uniqueID() : 0,
+                 gDefaultFamily->fFaces[0] ? gDefaultFamily->fFaces[0]->getRefCnt() : 0,
+                 gDefaultFamily->fFaces[1] ? gDefaultFamily->fFaces[1]->getRefCnt() : 0,
+                 gDefaultFamily->fFaces[2] ? gDefaultFamily->fFaces[2]->getRefCnt() : 0,
+                 gDefaultFamily->fFaces[3] ? gDefaultFamily->fFaces[3]->getRefCnt() : 0);
+    } else {
+        SkDebugf("gDefaultFamily=%p", gDefaultFamily);
+    }
+
+    SkDebugf("gSystemFonts.count()=%d gFallbackFonts.count()=%d",
+            gSystemFonts.count(), gFallbackFonts.count());
+
+    for (int i = 0; i < gSystemFonts.count(); ++i) {
+        SkDebugf("gSystemFonts[%d] fileName=%s", i, gSystemFonts[i].fFileName);
+        size_t namesIndex = 0;
+        if (gSystemFonts[i].fNames)
+            for (const char* fontName = gSystemFonts[i].fNames[namesIndex];
+                    fontName != 0;
+                    fontName = gSystemFonts[i].fNames[++namesIndex]) {
+                SkDebugf("       name[%u]=%s", namesIndex, fontName);
+            }
+    }
+
+    if (gFamilyHead) {
+        FamilyRec* rec = gFamilyHead;
+        int i=0;
+        while (rec) {
+            SkDebugf("gFamilyHead[%d]=%p fFaces={%u,%u,%u,%u} refCnt={%d,%d,%d,%d}",
+                     i++, rec,
+                     rec->fFaces[0] ? rec->fFaces[0]->uniqueID() : 0,
+                     rec->fFaces[1] ? rec->fFaces[1]->uniqueID() : 0,
+                     rec->fFaces[2] ? rec->fFaces[2]->uniqueID() : 0,
+                     rec->fFaces[3] ? rec->fFaces[3]->uniqueID() : 0,
+                     rec->fFaces[0] ? rec->fFaces[0]->getRefCnt() : 0,
+                     rec->fFaces[1] ? rec->fFaces[1]->getRefCnt() : 0,
+                     rec->fFaces[2] ? rec->fFaces[2]->getRefCnt() : 0,
+                     rec->fFaces[3] ? rec->fFaces[3]->getRefCnt() : 0);
+            rec = rec->fNext;
+        }
+    } else {
+        SkDebugf("gFamilyHead=%p", gFamilyHead);
+    }
+
+}
+
+
+static bool haveSystemFont(const char* filename) {
+    for (int i = 0; i < gSystemFonts.count(); i++) {
+        if (strcmp(gSystemFonts[i].fFileName, filename) == 0) {
+            return true;
+        }
+    }
+    return false;
+}
 
 /*  Load info from a configuration file that populates the system/fallback font structures
 */
-static void load_font_info() {
-//    load_font_info_xml("/system/etc/system_fonts.xml");
+static void loadFontInfoLocked() {
     SkTDArray<FontFamily*> fontFamilies;
     getFontFamilies(fontFamilies);
 
-    SkTDArray<FontInitRec> fontInfo;
-    bool firstInFamily = false;
+    gSystemFonts.reset();
+
     for (int i = 0; i < fontFamilies.count(); ++i) {
         FontFamily *family = fontFamilies[i];
-        firstInFamily = true;
         for (int j = 0; j < family->fFileNames.count(); ++j) {
+            const char* filename = family->fFileNames[j];
+            if (haveSystemFont(filename)) {
+                SkDebugf("---- system font and fallback font files specify a duplicate "
+                        "font %s, skipping the second occurrence", filename);
+                continue;
+            }
+
             FontInitRec fontInfoRecord;
-            fontInfoRecord.fFileName = family->fFileNames[j];
+            fontInfoRecord.fFileName = filename;
             if (j == 0) {
                 if (family->fNames.count() == 0) {
                     // Fallback font
@@ -470,46 +524,39 @@
             } else {
                 fontInfoRecord.fNames = NULL;
             }
-            *fontInfo.append() = fontInfoRecord;
+            *gSystemFonts.append() = fontInfoRecord;
         }
     }
-    gNumSystemFonts = fontInfo.count();
-    gSystemFonts = (FontInitRec*) malloc(gNumSystemFonts * sizeof(FontInitRec));
-    gFallbackFonts = (uint32_t*) malloc((gNumSystemFonts + 1) * sizeof(uint32_t));
-    if (gSystemFonts == NULL) {
-        // shouldn't get here
-        gNumSystemFonts = 0;
-    }
-//    SkDebugf("---- We have %d system fonts", gNumSystemFonts);
-    for (size_t i = 0; i < gNumSystemFonts; ++i) {
-        gSystemFonts[i].fFileName = fontInfo[i].fFileName;
-        gSystemFonts[i].fNames = fontInfo[i].fNames;
-//        SkDebugf("---- gSystemFonts[%d] fileName=%s", i, fontInfo[i].fFileName);
-    }
     fontFamilies.deleteAll();
+
+    SkDEBUGF(("---- We have %d system fonts", gSystemFonts.count()));
+    for (int i = 0; i < gSystemFonts.count(); ++i) {
+        SkDEBUGF(("---- gSystemFonts[%d] fileName=%s", i, gSystemFonts[i].fFileName));
+    }
 }
 
+
 /*
  *  Called once (ensured by the sentinel check at the beginning of our body).
  *  Initializes all the globals, and register the system fonts.
- *
- *  gFamilyHeadAndNameListMutex must already be acquired.
  */
-static void load_system_fonts() {
-    // check if we've already be called
-    if (NULL != gDefaultNormal) {
+static void initSystemFontsLocked() {
+    // check if we've already been called
+    if (gDefaultNormal) {
         return;
     }
 
-    load_font_info();
+    SkASSERT(gUniqueFontID == 0);
 
-    const FontInitRec* rec = gSystemFonts;
+    loadFontInfoLocked();
+
+    gFallbackFonts.reset();
+
     SkTypeface* firstInFamily = NULL;
-    int fallbackCount = 0;
-
-    for (size_t i = 0; i < gNumSystemFonts; i++) {
+    for (int i = 0; i < gSystemFonts.count(); i++) {
         // if we're the first in a new family, clear firstInFamily
-        if (rec[i].fNames != NULL) {
+        const char* const* names = gSystemFonts[i].fNames;
+        if (names != NULL) {
             firstInFamily = NULL;
         }
 
@@ -518,33 +565,38 @@
         SkTypeface::Style style;
 
         // we expect all the fonts, except the "fallback" fonts
-        bool isExpected = (rec[i].fNames != gFBNames);
-        if (!get_name_and_style(rec[i].fFileName, &name, &style,
-                                &isFixedWidth, isExpected)) {
+        bool isExpected = (names != gFBNames);
+        if (!getNameAndStyle(gSystemFonts[i].fFileName, &name, &style,
+                &isFixedWidth, isExpected)) {
+            // We need to increase gUniqueFontID here so that the unique id of
+            // each font matches its index in gSystemFonts array, as expected
+            // by findUniqueIDLocked.
+            sk_atomic_inc(&gUniqueFontID);
             continue;
         }
 
-        SkTypeface* tf = SkNEW_ARGS(FileTypeface,
-                                    (style,
-                                     true,  // system-font (cannot delete)
-                                     firstInFamily, // what family to join
-                                     rec[i].fFileName,
-                                     isFixedWidth) // filename
-                                    );
+        SkString fullpath;
+        getFullPathForSysFonts(&fullpath, gSystemFonts[i].fFileName);
 
-//        SkDebugf("---- SkTypeface[%d] %s fontID %d\n", i, rec[i].fFileName, tf->uniqueID());
+        SkTypeface* tf = SkNEW_ARGS(FileTypeface, (style,
+                true,  // system-font (cannot delete)
+                fullpath.c_str(), // filename
+                isFixedWidth));
+        addTypefaceLocked(tf, firstInFamily);
 
-        if (rec[i].fNames != NULL) {
+        SkDEBUGF(("---- SkTypeface[%d] %s fontID %d\n",
+                  i, gSystemFonts[i].fFileName, tf->uniqueID()));
+
+        if (names != NULL) {
             // see if this is one of our fallback fonts
-            if (rec[i].fNames == gFBNames) {
-//                SkDebugf("---- adding %s as fallback[%d] fontID %d\n",
-//                         rec[i].fFileName, fallbackCount, tf->uniqueID());
-                gFallbackFonts[fallbackCount++] = tf->uniqueID();
+            if (names == gFBNames) {
+                SkDEBUGF(("---- adding %s as fallback[%d] fontID %d\n",
+                        gSystemFonts[i].fFileName, gFallbackFonts.count(), tf->uniqueID()));
+                *gFallbackFonts.append() = tf->uniqueID();
             }
 
             firstInFamily = tf;
-            FamilyRec* family = find_family(tf);
-            const char* const* names = rec[i].fNames;
+            FamilyRec* family = findFamilyLocked(tf);
 
             // record the default family if this is it
             if (names == gDefaultNames) {
@@ -552,17 +604,113 @@
             }
             // add the names to map to this family
             while (*names) {
-                add_name(*names, family);
+                addNameLocked(*names, family);
                 names += 1;
             }
         }
     }
 
     // do this after all fonts are loaded. This is our default font, and it
-    // acts as a sentinel so we only execute load_system_fonts() once
-    gDefaultNormal = find_best_face(gDefaultFamily, SkTypeface::kNormal);
-    // now terminate our fallback list with the sentinel value
-    gFallbackFonts[fallbackCount] = 0;
+    // acts as a sentinel so we only execute loadSystemFontsLocked() once
+    gDefaultNormal = findBestFaceLocked(gDefaultFamily, SkTypeface::kNormal);
+
+    SkDEBUGCODE(dumpGlobalsLocked());
+}
+
+static SkFontID findUniqueIDLocked(const char* filename) {
+    // uniqueID is the index, offset by one, of the associated element in
+    // gSystemFonts[] (assumes system fonts are loaded before external fonts)
+    // return 0 if not found
+    for (int i = 0; i < gSystemFonts.count(); i++) {
+        if (strcmp(gSystemFonts[i].fFileName, filename) == 0) {
+            return i + 1; // assume unique id of i'th system font is i + 1
+        }
+    }
+    return 0;
+}
+
+static int findFallbackFontIndex(SkFontID fontId) {
+    for (int i = 0; i < gFallbackFonts.count(); i++) {
+        if (gFallbackFonts[i] == fontId) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+static void reloadFallbackFontsLocked() {
+    SkGraphics::PurgeFontCache();
+
+    SkTDArray<FontFamily*> fallbackFamilies;
+    getFallbackFontFamilies(fallbackFamilies);
+
+    gFallbackFonts.reset();
+
+    for (int i = 0; i < fallbackFamilies.count(); ++i) {
+        FontFamily *family = fallbackFamilies[i];
+
+        for (int j = 0; j < family->fFileNames.count(); ++j) {
+            const char* filename = family->fFileNames[j];
+            if (filename) {
+                if (!haveSystemFont(filename)) {
+                    SkDebugf("---- skipping fallback font %s because it was not "
+                            "previously loaded as a system font", filename);
+                    continue;
+                }
+
+                // ensure the fallback font exists before adding it to the list
+                bool isFixedWidth;
+                SkString name;
+                SkTypeface::Style style;
+                if (!getNameAndStyle(filename, &name, &style,
+                                        &isFixedWidth, false)) {
+                    continue;
+                }
+
+                SkFontID uniqueID = findUniqueIDLocked(filename);
+                SkASSERT(uniqueID != 0);
+                if (findFallbackFontIndex(uniqueID) >= 0) {
+                    SkDebugf("---- system font and fallback font files specify a duplicate "
+                            "font %s, skipping the second occurrence", filename);
+                    continue;
+                }
+
+                SkDEBUGF(("---- reload %s as fallback[%d] fontID %d\n",
+                          filename, gFallbackFonts.count(), uniqueID));
+
+                *gFallbackFonts.append() = uniqueID;
+                break;  // The fallback set contains only the first font of each family
+            }
+        }
+    }
+
+    fallbackFamilies.deleteAll();
+}
+
+static void loadSystemFontsLocked() {
+#if !defined(SK_BUILD_FOR_ANDROID_NDK)
+    static char prevLanguage[3];
+    static char prevRegion[3];
+    char language[3] = "";
+    char region[3] = "";
+
+    getLocale(language, region);
+
+    if (!gDefaultNormal) {
+        strncpy(prevLanguage, language, 2);
+        strncpy(prevRegion, region, 2);
+        initSystemFontsLocked();
+    } else if (strncmp(language, prevLanguage, 2) || strncmp(region, prevRegion, 2)) {
+        strncpy(prevLanguage, language, 2);
+        strncpy(prevRegion, region, 2);
+        reloadFallbackFontsLocked();
+    }
+#else
+    if (!gDefaultNormal) {
+        initSystemFontsLocked();
+        reloadFallbackFontsLocked();
+    }
+#endif
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -608,8 +756,11 @@
 
 SkTypeface* SkFontHost::Deserialize(SkStream* stream) {
     SkAutoMutexAcquire  ac(gFamilyHeadAndNameListMutex);
+    return deserializeLocked(stream);
+}
 
-    load_system_fonts();
+static SkTypeface* deserializeLocked(SkStream* stream) {
+    loadSystemFontsLocked();
 
     // check if the font is a custom or system font
     bool isCustomFont = stream->readBool();
@@ -623,7 +774,7 @@
         SkMemoryStream* fontStream = new SkMemoryStream(len);
         stream->read((void*)fontStream->getMemoryBase(), len);
 
-        SkTypeface* face = CreateTypefaceFromStream(fontStream);
+        SkTypeface* face = createTypefaceFromStreamLocked(fontStream);
 
         fontStream->unref();
 
@@ -639,15 +790,14 @@
             str.resize(len);
             stream->read(str.writable_str(), len);
 
-            const FontInitRec* rec = gSystemFonts;
-            for (size_t i = 0; i < gNumSystemFonts; i++) {
-                if (strcmp(rec[i].fFileName, str.c_str()) == 0) {
+            for (int i = 0; i < gSystemFonts.count(); i++) {
+                if (strcmp(gSystemFonts[i].fFileName, str.c_str()) == 0) {
                     // backup until we hit the fNames
                     for (int j = i; j >= 0; --j) {
-                        if (rec[j].fNames != NULL) {
-                            return SkFontHost::CreateTypeface(NULL,
-                                        rec[j].fNames[0], NULL, 0,
-                                        (SkTypeface::Style)style);
+                        if (gSystemFonts[j].fNames != NULL) {
+                            return createTypefaceLocked(NULL,
+                                    gSystemFonts[j].fNames[0], NULL, 0,
+                                    (SkTypeface::Style)style);
                         }
                     }
                 }
@@ -664,8 +814,13 @@
                                        const void* data, size_t bytelength,
                                        SkTypeface::Style style) {
     SkAutoMutexAcquire  ac(gFamilyHeadAndNameListMutex);
+    return createTypefaceLocked(familyFace, familyName, data, bytelength, style);
+}
 
-    load_system_fonts();
+static SkTypeface* createTypefaceLocked(const SkTypeface* familyFace,
+        const char familyName[], const void* data, size_t bytelength,
+        SkTypeface::Style style) {
+    loadSystemFontsLocked();
 
     // clip to legal style bits
     style = (SkTypeface::Style)(style & SkTypeface::kBoldItalic);
@@ -673,25 +828,28 @@
     SkTypeface* tf = NULL;
 
     if (NULL != familyFace) {
-        tf = find_typeface(familyFace, style);
+        tf = findTypefaceLocked(familyFace, style);
     } else if (NULL != familyName) {
 //        SkDebugf("======= familyName <%s>\n", familyName);
-        tf = find_typeface(familyName, style);
+        tf = findTypefaceLocked(familyName, style);
     }
 
     if (NULL == tf) {
-        tf = find_best_face(gDefaultFamily, style);
+        tf = findBestFaceLocked(gDefaultFamily, style);
     }
 
-    // we ref(), since the symantic is to return a new instance
+    // we ref(), since the semantic is to return a new instance
     tf->ref();
     return tf;
 }
 
 SkStream* SkFontHost::OpenStream(uint32_t fontID) {
     SkAutoMutexAcquire  ac(gFamilyHeadAndNameListMutex);
+    return openStreamLocked(fontID);
+}
 
-    FamilyTypeface* tf = (FamilyTypeface*)find_from_uniqueID(fontID);
+static SkStream* openStreamLocked(uint32_t fontID) {
+    FamilyTypeface* tf = (FamilyTypeface*)findFromUniqueIDLocked(fontID);
     SkStream* stream = tf ? tf->openStream() : NULL;
 
     if (stream && stream->getLength() == 0) {
@@ -704,8 +862,11 @@
 size_t SkFontHost::GetFileName(SkFontID fontID, char path[], size_t length,
                                int32_t* index) {
     SkAutoMutexAcquire  ac(gFamilyHeadAndNameListMutex);
+    return getFileNameLocked(fontID, path, length, index);
+}
 
-    FamilyTypeface* tf = (FamilyTypeface*)find_from_uniqueID(fontID);
+static size_t getFileNameLocked(SkFontID fontID, char path[], size_t length, int32_t* index) {
+    FamilyTypeface* tf = (FamilyTypeface*)findFromUniqueIDLocked(fontID);
     const char* src = tf ? tf->getFilePath() : NULL;
 
     if (src) {
@@ -724,53 +885,67 @@
 
 SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
     SkAutoMutexAcquire  ac(gFamilyHeadAndNameListMutex);
+    return nextLogicalFontLocked(currFontID, origFontID);
+}
 
-    load_system_fonts();
+static SkFontID nextLogicalFontLocked(SkFontID currFontID, SkFontID origFontID) {
+    loadSystemFontsLocked();
 
-    const SkTypeface* origTypeface = find_from_uniqueID(origFontID);
-    const SkTypeface* currTypeface = find_from_uniqueID(currFontID);
+    const SkTypeface* origTypeface = findFromUniqueIDLocked(origFontID);
+    const SkTypeface* currTypeface = findFromUniqueIDLocked(currFontID);
 
     SkASSERT(origTypeface != 0);
     SkASSERT(currTypeface != 0);
 
     // Our fallback list always stores the id of the plain in each fallback
     // family, so we transform currFontID to its plain equivalent.
-    currFontID = find_typeface(currTypeface, SkTypeface::kNormal)->uniqueID();
+    SkFontID plainFontID = findTypefaceLocked(currTypeface, SkTypeface::kNormal)->uniqueID();
 
     /*  First see if fontID is already one of our fallbacks. If so, return
         its successor. If fontID is not in our list, then return the first one
         in our list. Note: list is zero-terminated, and returning zero means
         we have no more fonts to use for fallbacks.
      */
-    const uint32_t* list = gFallbackFonts;
-    for (int i = 0; list[i] != 0; i++) {
-        if (list[i] == currFontID) {
-            if (list[i+1] == 0)
-                return 0;
-            const SkTypeface* nextTypeface = find_from_uniqueID(list[i+1]);
-            return find_typeface(nextTypeface, origTypeface->style())->uniqueID();
-        }
+    int plainFallbackFontIndex = findFallbackFontIndex(plainFontID);
+    int nextFallbackFontIndex = plainFallbackFontIndex + 1;
+    SkFontID nextFontID;
+    if (nextFallbackFontIndex == gFallbackFonts.count()) {
+        nextFontID = 0; // no more fallbacks
+    } else {
+        const SkTypeface* nextTypeface = findFromUniqueIDLocked(gFallbackFonts[nextFallbackFontIndex]);
+        nextFontID = findTypefaceLocked(nextTypeface, origTypeface->style())->uniqueID();
     }
 
-    // If we get here, currFontID was not a fallback, so we start at the
-    // beginning of our list.
-    const SkTypeface* firstTypeface = find_from_uniqueID(list[0]);
-    return find_typeface(firstTypeface, origTypeface->style())->uniqueID();
+    SkDEBUGF(("---- nextLogicalFont: currFontID=%d, origFontID=%d, plainFontID=%d, "
+            "plainFallbackFontIndex=%d, nextFallbackFontIndex=%d "
+            "=> nextFontID=%d", currFontID, origFontID, plainFontID,
+            plainFallbackFontIndex, nextFallbackFontIndex, nextFontID));
+    return nextFontID;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
 SkTypeface* SkFontHost::CreateTypefaceFromStream(SkStream* stream) {
+    SkAutoMutexAcquire  ac(gFamilyHeadAndNameListMutex);
+    return createTypefaceFromStreamLocked(stream);
+}
+
+static SkTypeface* createTypefaceFromStreamLocked(SkStream* stream) {
     if (NULL == stream || stream->getLength() <= 0) {
         return NULL;
     }
 
+    // Make sure system fonts are loaded first to comply with the assumption
+    // that the font's uniqueID can be found using the findUniqueIDLocked method.
+    loadSystemFontsLocked();
+
     bool isFixedWidth;
     SkTypeface::Style style;
 
     if (find_name_and_attributes(stream, NULL, &style, &isFixedWidth)) {
-        SkAutoMutexAcquire  ac(gFamilyHeadAndNameListMutex);
-        return SkNEW_ARGS(StreamTypeface, (style, false, NULL, stream, isFixedWidth));
+        SkTypeface* typeface = SkNEW_ARGS(StreamTypeface, (style, false, stream, isFixedWidth));
+        addTypefaceLocked(typeface, NULL);
+        return typeface;
     } else {
         return NULL;
     }
diff --git a/src/ports/SkImageRef_ashmem.cpp b/src/ports/SkImageRef_ashmem.cpp
index f9c6aff..46ebb0d 100644
--- a/src/ports/SkImageRef_ashmem.cpp
+++ b/src/ports/SkImageRef_ashmem.cpp
@@ -41,6 +41,8 @@
     fRec.fPinned = false;
             
     fCT = NULL;
+            
+    this->useDefaultMutex();   // we don't need/want the shared imageref mutex
 }
 
 SkImageRef_ashmem::~SkImageRef_ashmem() {
@@ -235,6 +237,7 @@
         buffer.read(buf, length);
         setURI(buf, length);
     }
+    this->useDefaultMutex();   // we don't need/want the shared imageref mutex
 }
 
 SkPixelRef* SkImageRef_ashmem::Create(SkFlattenableReadBuffer& buffer) {
diff --git a/src/utils/SkCamera.cpp b/src/utils/SkCamera.cpp
index a387257..ac6fa0f 100644
--- a/src/utils/SkCamera.cpp
+++ b/src/utils/SkCamera.cpp
@@ -371,6 +371,18 @@
     fCamera.update();
     
 }
+
+SkScalar Sk3DView::getCameraLocationX() {
+    return fCamera.fLocation.fX / SkFloatToScalar(72.0f);
+}
+
+SkScalar Sk3DView::getCameraLocationY() {
+    return fCamera.fLocation.fY / SkFloatToScalar(72.0f);
+}
+
+SkScalar Sk3DView::getCameraLocationZ() {
+    return fCamera.fLocation.fZ / SkFloatToScalar(72.0f);
+}
 #endif
 
 void Sk3DView::translate(SkScalar x, SkScalar y, SkScalar z) {