diff --git a/bench/Android.mk b/bench/Android.mk
index 71523af..a0fe86c 100644
--- a/bench/Android.mk
+++ b/bench/Android.mk
@@ -3,17 +3,20 @@
 include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES := \
-	BitmapBench.cpp \
+  BenchGpuTimer_none.cpp \
+  BenchSysTimer_posix.cpp \
+  BenchTimer.cpp \
+  BitmapBench.cpp \
   DecodeBench.cpp \
   FPSBench.cpp \
   GradientBench.cpp \
   MatrixBench.cpp \
   PathBench.cpp \
-	RectBench.cpp \
-	RepeatTileBench.cpp \
-	TextBench.cpp \
-	SkBenchmark.cpp \
-	benchmain.cpp
+  RectBench.cpp \
+  RepeatTileBench.cpp \
+  TextBench.cpp \
+  SkBenchmark.cpp \
+  benchmain.cpp
 
 # additional optional class for this tool
 LOCAL_SRC_FILES += \
diff --git a/bench/BenchGpuTimer_gl.cpp b/bench/BenchGpuTimer_gl.cpp
new file mode 100644
index 0000000..ec2145d
--- /dev/null
+++ b/bench/BenchGpuTimer_gl.cpp
@@ -0,0 +1,181 @@
+#include "BenchGpuTimer_gl.h"
+#include <string.h>
+
+//GL
+#define BENCH_GL_FUNCTION_TYPE
+#if defined(SK_MESA)
+    #include <GL/osmesa.h>
+    #define SK_BENCH_CONTEXT_CHECK (NULL != OSMesaGetCurrentContext())
+    
+    #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \
+            OSMesaGetProcAddress("gl" #F);
+    #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\
+            OSMesaGetProcAddress("gl" #F #S);
+
+#elif defined(SK_BUILD_FOR_WIN32)
+    #define WIN32_LEAN_AND_MEAN 1
+    #include <Windows.h>
+    #include <GL/GL.h>
+    #define SK_BENCH_CONTEXT_CHECK (NULL != wglGetCurrentContext())
+    
+    #undef BENCH_GL_FUNCTION_TYPE
+    #define BENCH_GL_FUNCTION_TYPE __stdcall
+
+    #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \
+            wglGetProcAddress("gl" #F);
+    #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\
+            wglGetProcAddress("gl" #F #S);
+    
+#elif defined(SK_BUILD_FOR_MAC)
+    #include <OpenGL/gl.h>
+    #include <OpenGL/CGLCurrent.h>
+    #define SK_BENCH_CONTEXT_CHECK (NULL != CGLGetCurrentContext())
+    
+#elif defined(SK_BUILD_FOR_UNIX)
+    #include <GL/gl.h>
+    #include <GL/glx.h>
+    #define SK_BENCH_CONTEXT_CHECK (NULL != glXGetCurrentContext())
+    
+    #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \
+            glXGetProcAddressARB(reinterpret_cast<const GLubyte*>("gl" #F));
+    #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\
+            glXGetProcAddressARB(reinterpret_cast<const GLubyte*>("gl" #F #S));
+#else
+    #error unsupported platform
+#endif
+
+#define BenchGL_TIME_ELAPSED 0x88BF
+#define BenchGL_QUERY_RESULT 0x8866
+#define BenchGL_QUERY_RESULT_AVAILABLE 0x8867
+
+#if defined(SK_BUILD_FOR_WIN32)
+typedef UINT64 BenchGLuint64;
+#else
+#include <stdint.h>
+typedef uint64_t BenchGLuint64;
+#endif
+
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGenQueriesProc) (GLsizei n, GLuint *ids);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLBeginQueryProc) (GLenum target, GLuint id);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLEndQueryProc) (GLenum target);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLDeleteQueriesProc) (GLsizei n, const GLuint *ids);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGetQueryObjectivProc) (GLuint id, GLenum pname, GLint *params);
+typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGetQueryObjectui64vProc) (GLuint id, GLenum pname, BenchGLuint64 *params);
+
+struct BenchGLInterface {
+    bool fHasTimer;
+    BenchGLGenQueriesProc fGenQueries;
+    BenchGLBeginQueryProc fBeginQuery;
+    BenchGLEndQueryProc fEndQuery;
+    BenchGLDeleteQueriesProc fDeleteQueries;
+    BenchGLGetQueryObjectivProc fGetQueryObjectiv;
+    BenchGLGetQueryObjectui64vProc fGetQueryObjectui64v;
+};
+
+static bool BenchGLCheckExtension(const char* ext,
+                                  const char* extensionString) {
+    int extLength = strlen(ext);
+
+    while (true) {
+        int n = strcspn(extensionString, " ");
+        if (n == extLength && 0 == strncmp(ext, extensionString, n)) {
+            return true;
+        }
+        if (0 == extensionString[n]) {
+            return false;
+        }
+        extensionString += n+1;
+    }
+
+    return false;
+}
+
+static BenchGLInterface gBenchGL;
+static bool gBenchGLInterfaceInit = false;
+
+static void BenchGLSetDefaultGLInterface() {
+    gBenchGL.fHasTimer = false;
+    if (gBenchGLInterfaceInit || !SK_BENCH_CONTEXT_CHECK) return;
+
+    const char* glExts =
+        reinterpret_cast<const char*>(glGetString(GL_EXTENSIONS));
+    const GLboolean ext =
+        BenchGLCheckExtension("GL_EXT_timer_query", glExts);
+    const GLboolean arb =
+        BenchGLCheckExtension("GL_ARB_timer_query", glExts);
+    if (ext || arb) {
+#if defined(SK_BUILD_FOR_MAC)
+        #if GL_EXT_timer_query || GL_ARB_timer_query
+        gBenchGL.fHasTimer = true;
+        gBenchGL.fGenQueries = glGenQueries;
+        gBenchGL.fBeginQuery = glBeginQuery;
+        gBenchGL.fEndQuery = glEndQuery;
+        gBenchGL.fDeleteQueries = glDeleteQueries;
+        gBenchGL.fGetQueryObjectiv = glGetQueryObjectiv;
+        #endif
+        #if GL_ARB_timer_query
+        gBenchGL.fGetQueryObjectui64v = glGetQueryObjectui64v;
+        #elif GL_EXT_timer_query
+        gBenchGL.fGetQueryObjectui64v = glGetQueryObjectui64vEXT;
+        #endif
+#else
+        gBenchGL.fHasTimer = true;
+        SK_GL_GET_PROC(GenQueries)
+        SK_GL_GET_PROC(BeginQuery)
+        SK_GL_GET_PROC(EndQuery)
+        SK_GL_GET_PROC(DeleteQueries)
+        
+        SK_GL_GET_PROC(GetQueryObjectiv)
+        if (arb) {
+            SK_GL_GET_PROC(GetQueryObjectui64v)
+        } else {
+            SK_GL_GET_PROC_SUFFIX(GetQueryObjectui64v, EXT)
+        }
+#endif
+    }
+    gBenchGLInterfaceInit = true;
+}
+
+BenchGpuTimer::BenchGpuTimer() {
+    BenchGLSetDefaultGLInterface();
+    if (gBenchGL.fHasTimer) {
+        gBenchGL.fGenQueries(1, &this->fQuery);
+    }
+}
+
+BenchGpuTimer::~BenchGpuTimer() {
+    if (gBenchGL.fHasTimer) {
+        gBenchGL.fDeleteQueries(1, &this->fQuery);
+    }
+}
+
+void BenchGpuTimer::startGpu() {
+    if (!gBenchGL.fHasTimer) return;
+    
+    this->fStarted = true;
+    gBenchGL.fBeginQuery(BenchGL_TIME_ELAPSED, this->fQuery);
+}
+
+/**
+ * It is important to stop the cpu clocks first,
+ * as this will cpu wait for the gpu to finish.
+ */
+double BenchGpuTimer::endGpu() {
+    if (!gBenchGL.fHasTimer) return 0;
+    
+    this->fStarted = false;
+    gBenchGL.fEndQuery(BenchGL_TIME_ELAPSED);
+    
+    GLint available = 0;
+    while (!available) {
+        gBenchGL.fGetQueryObjectiv(this->fQuery
+                                 , BenchGL_QUERY_RESULT_AVAILABLE
+                                 , &available);
+    }
+    BenchGLuint64 totalGPUTimeElapsed = 0;
+    gBenchGL.fGetQueryObjectui64v(this->fQuery
+                                , BenchGL_QUERY_RESULT
+                                , &totalGPUTimeElapsed);
+    
+    return totalGPUTimeElapsed / 1000000.0;
+}
diff --git a/bench/BenchGpuTimer_gl.h b/bench/BenchGpuTimer_gl.h
new file mode 100644
index 0000000..ac23482
--- /dev/null
+++ b/bench/BenchGpuTimer_gl.h
@@ -0,0 +1,33 @@
+#ifndef SkBenchGpuTimer_DEFINED
+#define SkBenchGpuTimer_DEFINED
+
+#if defined(SK_MESA)
+    #include <GL/osmesa.h>
+
+#elif defined(SK_BUILD_FOR_WIN32)
+    #define WIN32_LEAN_AND_MEAN 1
+    #include <Windows.h>
+    #include <GL/GL.h>
+    
+#elif defined(SK_BUILD_FOR_MAC)
+    #include <OpenGL/gl.h>
+    
+#elif defined(SK_BUILD_FOR_UNIX)
+    #include <GL/gl.h>
+
+#else
+    #error unsupported platform
+#endif
+
+class BenchGpuTimer {
+public:
+    BenchGpuTimer();
+    ~BenchGpuTimer();
+    void startGpu();
+    double endGpu();
+private:
+    GLuint fQuery;
+    int fStarted;
+};
+
+#endif
diff --git a/bench/BenchGpuTimer_none.cpp b/bench/BenchGpuTimer_none.cpp
new file mode 100644
index 0000000..0dba6d7
--- /dev/null
+++ b/bench/BenchGpuTimer_none.cpp
@@ -0,0 +1,14 @@
+#include "BenchGpuTimer_none.h"
+
+BenchGpuTimer::BenchGpuTimer() {
+}
+
+BenchGpuTimer::~BenchGpuTimer() {
+}
+
+void BenchGpuTimer::startGpu() {
+}
+
+double BenchGpuTimer::endGpu() {
+    return -1.0;
+}
diff --git a/bench/BenchGpuTimer_none.h b/bench/BenchGpuTimer_none.h
new file mode 100644
index 0000000..7069ca4
--- /dev/null
+++ b/bench/BenchGpuTimer_none.h
@@ -0,0 +1,12 @@
+#ifndef SkBenchGpuTimer_DEFINED
+#define SkBenchGpuTimer_DEFINED
+
+class BenchGpuTimer {
+public:
+    BenchGpuTimer();
+    ~BenchGpuTimer();
+    void startGpu();
+    double endGpu();
+};
+
+#endif
diff --git a/bench/BenchSysTimer_c.cpp b/bench/BenchSysTimer_c.cpp
new file mode 100644
index 0000000..fc0850b
--- /dev/null
+++ b/bench/BenchSysTimer_c.cpp
@@ -0,0 +1,20 @@
+#include "BenchSysTimer_c.h"
+
+//Time
+#include <time.h>
+
+void BenchSysTimer::startWall() {
+    this->fStartWall = time();
+}
+void BenchSysTimer::startCpu() {
+    this->fStartCpu = clock();
+}
+
+double BenchSysTimer::endCpu() {
+    clock_t end_cpu = clock();
+    this->fCpu = (end_cpu - this->fStartCpu) * CLOCKS_PER_SEC / 1000.0;
+}
+double BenchSysTimer::endWall() {
+    time_t end_wall = time();
+    this->fWall = difftime(end_wall, this->fstartWall) / 1000.0;
+}
diff --git a/bench/BenchSysTimer_c.h b/bench/BenchSysTimer_c.h
new file mode 100644
index 0000000..c598f30
--- /dev/null
+++ b/bench/BenchSysTimer_c.h
@@ -0,0 +1,19 @@
+#ifndef SkBenchSysTimer_DEFINED
+#define SkBenchSysTimer_DEFINED
+
+//Time
+#include <time.h>
+#warning standard clocks
+
+class BenchSysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    clock_t start_cpu;
+    time_t fStartWall;
+};
+
+#endif
diff --git a/bench/BenchSysTimer_mach.cpp b/bench/BenchSysTimer_mach.cpp
new file mode 100644
index 0000000..b23897c
--- /dev/null
+++ b/bench/BenchSysTimer_mach.cpp
@@ -0,0 +1,69 @@
+#include "BenchSysTimer_mach.h"
+
+//Time
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+
+static time_value_t macCpuTime() {
+    mach_port_t task = mach_task_self();
+    if (task == MACH_PORT_NULL) {
+        time_value_t none = {0, 0};
+        return none;
+    }
+    
+    task_thread_times_info thread_info_data;
+    mach_msg_type_number_t thread_info_count = TASK_THREAD_TIMES_INFO_COUNT;
+    if (KERN_SUCCESS != task_info(task,
+                 TASK_THREAD_TIMES_INFO,
+                 reinterpret_cast<task_info_t>(&thread_info_data),
+                 &thread_info_count))
+    {
+        time_value_t none = {0, 0};
+        return none;
+    }
+    
+    time_value_add(&thread_info_data.user_time, &thread_info_data.system_time)
+    return thread_info_data.user_time;
+}
+
+static double intervalInMSec(const time_value_t start_clock
+                           , const time_value_t end_clock)
+{
+    double duration_clock;
+    if ((end_clock.microseconds - start_clock.microseconds) < 0) {
+        duration_clock = (end_clock.seconds - start_clock.seconds-1)*1000;
+        duration_clock += (1000000
+                           + end_clock.microseconds
+                           - start_clock.microseconds) / 1000.0;
+    } else {
+        duration_clock = (end_clock.seconds - start_clock.seconds)*1000;
+        duration_clock += (end_clock.microseconds - start_clock.microseconds)
+                           / 1000.0;
+    }
+    return duration_clock;
+}
+
+void BenchSysTimer::startWall() {
+    this->fStartWall = mach_absolute_time();
+}
+void BenchSysTimer::startCpu() {
+    this->fStartCpu = macCpuTime();
+}
+
+double BenchSysTimer::endCpu() {
+    time_value_t end_cpu = macCpuTime();
+    return intervalInMSec(this->fStartCpu, end_cpu);
+}
+double BenchSysTimer::endWall() {
+    uint64_t end_wall = mach_absolute_time();
+    
+    uint64_t elapsed = end_wall - this->fStartWall;
+    mach_timebase_info_data_t sTimebaseInfo;
+    if (KERN_SUCCESS != mach_timebase_info(&sTimebaseInfo)) {
+        return 0;
+    } else {
+        uint64_t elapsedNano = elapsed * sTimebaseInfo.numer
+                               / sTimebaseInfo.denom;
+        return elapsedNano / 1000000;
+    }
+}
diff --git a/bench/BenchSysTimer_mach.h b/bench/BenchSysTimer_mach.h
new file mode 100644
index 0000000..da4fff0
--- /dev/null
+++ b/bench/BenchSysTimer_mach.h
@@ -0,0 +1,19 @@
+#ifndef SkBenchSysTimer_DEFINED
+#define SkBenchSysTimer_DEFINED
+
+//Time
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+
+class BenchSysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    time_value_t fStartCpu;
+    uint64_t fStartWall;
+};
+
+#endif
diff --git a/bench/BenchSysTimer_posix.cpp b/bench/BenchSysTimer_posix.cpp
new file mode 100644
index 0000000..5d28f40
--- /dev/null
+++ b/bench/BenchSysTimer_posix.cpp
@@ -0,0 +1,50 @@
+#include "BenchSysTimer_posix.h"
+
+//Time
+#include <time.h>
+
+static double intervalInMSec(const timespec start_clock
+                           , const timespec end_clock)
+{
+    double duration_clock;
+    if ((end_clock.tv_nsec - start_clock.tv_nsec) < 0) {
+        duration_clock = (end_clock.tv_sec - start_clock.tv_sec-1)*1000;
+        duration_clock += (1000000000 + end_clock.tv_nsec - start_clock.tv_nsec)
+                           / 1000000.0;
+    } else {
+        duration_clock = (end_clock.tv_sec - start_clock.tv_sec)*1000;
+        duration_clock += (end_clock.tv_nsec - start_clock.tv_nsec) / 1000000.0;
+    }
+    return duration_clock;
+}
+
+void BenchSysTimer::startWall() {
+    if (-1 == clock_gettime(CLOCK_MONOTONIC, &this->fWall)) {
+        timespec none = {0, 0};
+        this->fWall = none;
+    }
+}
+void BenchSysTimer::startCpu() {
+    if (-1 == clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &this->fCpu)) {
+        timespec none = {0, 0};
+        this->fCpu = none;
+    }
+}
+
+double BenchSysTimer::endCpu() {
+    timespec end_cpu;
+    if (-1 == clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_cpu)) {
+        timespec none = {0, 0};
+        end_cpu = none;
+    }
+    return intervalInMSec(this->fCpu, end_cpu);
+}
+
+double BenchSysTimer::endWall() {
+    timespec end_wall;
+    if (-1 == clock_gettime(CLOCK_MONOTONIC, &end_wall)) {
+        timespec none = {0, 0};
+        end_wall = none;
+    }
+    return intervalInMSec(this->fWall, end_wall);
+}
diff --git a/bench/BenchSysTimer_posix.h b/bench/BenchSysTimer_posix.h
new file mode 100644
index 0000000..09dfb0e
--- /dev/null
+++ b/bench/BenchSysTimer_posix.h
@@ -0,0 +1,19 @@
+#ifndef SkBenchSysTimer_DEFINED
+#define SkBenchSysTimer_DEFINED
+
+//Time
+#include <time.h>
+
+class BenchSysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    timespec fCpu;
+    timespec fWall;
+};
+
+#endif
+
diff --git a/bench/BenchSysTimer_windows.cpp b/bench/BenchSysTimer_windows.cpp
new file mode 100644
index 0000000..923754c
--- /dev/null
+++ b/bench/BenchSysTimer_windows.cpp
@@ -0,0 +1,55 @@
+#include "BenchSysTimer_windows.h"
+
+//Time
+#define WIN32_LEAN_AND_MEAN 1
+#include <Windows.h>
+
+static ULONGLONG winCpuTime() {
+    FILETIME createTime;
+    FILETIME exitTime;
+    FILETIME usrTime;
+    FILETIME sysTime;
+    if (0 == GetProcessTimes(GetCurrentProcess()
+                           , &createTime, &exitTime
+                           , &sysTime, &usrTime))
+    {
+        return 0;
+    }
+    ULARGE_INTEGER start_cpu_sys;
+    ULARGE_INTEGER start_cpu_usr;
+    start_cpu_sys.LowPart  = sysTime.dwLowDateTime;
+    start_cpu_sys.HighPart = sysTime.dwHighDateTime;
+    start_cpu_usr.LowPart  = usrTime.dwLowDateTime;
+    start_cpu_usr.HighPart = usrTime.dwHighDateTime;
+    return start_cpu_sys.QuadPart + start_cpu_usr.QuadPart;
+}
+
+void BenchSysTimer::startWall() {
+    if (0 == ::QueryPerformanceCounter(&this->fStartWall)) {
+        this->fStartWall.QuadPart = 0;
+    }
+}
+void BenchSysTimer::startCpu() {
+    this->fStartCpu = winCpuTime();
+}
+
+double BenchSysTimer::endCpu() {
+    ULONGLONG end_cpu = winCpuTime();
+    return (end_cpu - this->fStartCpu) / 10000;
+}
+double BenchSysTimer::endWall() {
+    LARGE_INTEGER end_wall;
+    if (0 == ::QueryPerformanceCounter(&end_wall)) {
+        end_wall.QuadPart = 0;
+    }
+    
+    LARGE_INTEGER ticks_elapsed;
+    ticks_elapsed.QuadPart = end_wall.QuadPart - this->fStartWall.QuadPart;
+    
+    LARGE_INTEGER frequency;
+    if (0 == ::QueryPerformanceFrequency(&frequency)) {
+        return 0;
+    } else {
+        return (double)ticks_elapsed.QuadPart / frequency.QuadPart * 1000;
+    }
+}
diff --git a/bench/BenchSysTimer_windows.h b/bench/BenchSysTimer_windows.h
new file mode 100644
index 0000000..72a3fb2
--- /dev/null
+++ b/bench/BenchSysTimer_windows.h
@@ -0,0 +1,19 @@
+#ifndef SkBenchSysTimer_DEFINED
+#define SkBenchSysTimer_DEFINED
+
+//Time
+#define WIN32_LEAN_AND_MEAN 1
+#include <Windows.h>
+
+struct BenchSysTimer {
+public:
+    void startWall();
+    void startCpu();
+    double endCpu();
+    double endWall();
+private:
+    ULONGLONG fStartCpu;
+    LARGE_INTEGER fStartWall;
+};
+
+#endif
diff --git a/bench/BenchTimer.cpp b/bench/BenchTimer.cpp
new file mode 100644
index 0000000..e7b0068
--- /dev/null
+++ b/bench/BenchTimer.cpp
@@ -0,0 +1,48 @@
+#include "BenchTimer.h"
+#if defined(SK_BUILD_FOR_WIN32)
+    #include "BenchSysTimer_windows.h"
+#elif defined(SK_BUILD_FOR_MAC)
+    #include "BenchSysTimer_mach.h"
+#elif defined(SK_BUILD_FOR_UNIX)
+    #include "BenchSysTimer_posix.h"
+#else
+    #include "BenchSysTimer_c.h"
+#endif
+
+#if defined(SK_MESA) || \
+    defined(SK_BUILD_FOR_WIN32) || \
+    defined(SK_BUILD_FOR_MAC) || \
+    defined(SK_BUILD_FOR_UNIX)
+    #include "BenchGpuTimer_gl.h"
+
+#else
+    #include "BenchGpuTimer_none.h"
+#endif
+
+BenchTimer::BenchTimer()
+        : fCpu(-1.0)
+        , fWall(-1.0)
+        , fGpu(-1.0)
+{
+    this->fSysTimer = new BenchSysTimer();
+    this->fGpuTimer = new BenchGpuTimer();
+}
+
+BenchTimer::~BenchTimer() {
+    delete this->fSysTimer;
+    delete this->fGpuTimer;
+}
+
+void BenchTimer::start() {
+    this->fSysTimer->startWall();
+    this->fGpuTimer->startGpu();
+    this->fSysTimer->startCpu();
+}
+
+void BenchTimer::end() {
+    this->fCpu = this->fSysTimer->endCpu();
+    //It is important to stop the cpu clocks first,
+    //as the following will cpu wait for the gpu to finish.
+    this->fGpu = this->fGpuTimer->endGpu();
+    this->fWall = this->fSysTimer->endWall();
+}
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
new file mode 100644
index 0000000..eae82d5
--- /dev/null
+++ b/bench/BenchTimer.h
@@ -0,0 +1,27 @@
+#ifndef SkBenchTimer_DEFINED
+#define SkBenchTimer_DEFINED
+
+class BenchSysTimer;
+class BenchGpuTimer;
+
+/**
+ * SysTimers and GpuTimers are implemented orthogonally.
+ * This class combines a SysTimer and a GpuTimer into one single,
+ * platform specific, Timer with a simple interface.
+ */
+class BenchTimer {
+public:
+    BenchTimer();
+    ~BenchTimer();
+    void start();
+    void end();
+    double fCpu;
+    double fWall;
+    double fGpu;
+    
+private:
+    BenchSysTimer *fSysTimer;
+    BenchGpuTimer *fGpuTimer;
+};
+
+#endif
diff --git a/bench/MatrixBench.cpp b/bench/MatrixBench.cpp
index d963bc7..dce0358 100644
--- a/bench/MatrixBench.cpp
+++ b/bench/MatrixBench.cpp
@@ -67,11 +67,10 @@
 class ScaleMatrixBench : public MatrixBench {
 public:
     ScaleMatrixBench(void* param) : INHERITED(param, "scale") {
-
+        fSX = fSY = SkFloatToScalar(1.5f);
         fM0.reset();
         fM1.setScale(fSX, fSY);
         fM2.setTranslate(fSX, fSY);
-        fSX = fSY = SkFloatToScalar(1.5f);
     }
 protected:
     virtual void performTest() {
@@ -215,6 +214,89 @@
     typedef MatrixBench INHERITED;
 };
 
+#ifdef SK_SCALAR_IS_FLOAT
+class ScaleTransMixedMatrixBench : public MatrixBench {
+ public:
+    ScaleTransMixedMatrixBench(void* p) : INHERITED(p, "scaletrans_mixed"), fCount (16) {
+        fMatrix.setAll(fRandom.nextS(), fRandom.nextS(), fRandom.nextS(),
+                       fRandom.nextS(), fRandom.nextS(), fRandom.nextS(),
+                       fRandom.nextS(), fRandom.nextS(), fRandom.nextS());
+        int i;
+        for (i = 0; i < fCount; i++) {
+            fSrc[i].fX = fRandom.nextS();
+            fSrc[i].fY = fRandom.nextS();
+            fDst[i].fX = fRandom.nextS();
+            fDst[i].fY = fRandom.nextS();
+        }
+    }
+ protected:
+    virtual void performTest() {
+        SkPoint* dst = fDst;
+        const SkPoint* src = fSrc;
+        int count = fCount;
+        float mx = fMatrix[SkMatrix::kMScaleX];
+        float my = fMatrix[SkMatrix::kMScaleY];
+        float tx = fMatrix[SkMatrix::kMTransX];
+        float ty = fMatrix[SkMatrix::kMTransY];
+        do {
+            dst->fY = SkScalarMulAdd(src->fY, my, ty);
+            dst->fX = SkScalarMulAdd(src->fX, mx, tx);
+            src += 1;
+            dst += 1;
+        } while (--count);
+    }
+ private:
+    SkMatrix fMatrix;
+    SkPoint fSrc [16];
+    SkPoint fDst [16];
+    int fCount;
+    SkRandom fRandom;
+    typedef MatrixBench INHERITED;
+};
+
+
+class ScaleTransDoubleMatrixBench : public MatrixBench {
+ public:
+    ScaleTransDoubleMatrixBench(void* p) : INHERITED(p, "scaletrans_double"), fCount (16) {
+        init9(fMatrix);
+        int i;
+        for (i = 0; i < fCount; i++) {
+            fSrc[i].fX = fRandom.nextS();
+            fSrc[i].fY = fRandom.nextS();
+            fDst[i].fX = fRandom.nextS();
+            fDst[i].fY = fRandom.nextS();
+        }
+    }
+ protected:
+    virtual void performTest() {
+        SkPoint* dst = fDst;
+        const SkPoint* src = fSrc;
+        int count = fCount;
+        // As doubles, on Z600 Linux systems this is 2.5x as expensive as mixed mode
+        float mx = fMatrix[SkMatrix::kMScaleX];
+        float my = fMatrix[SkMatrix::kMScaleY];
+        float tx = fMatrix[SkMatrix::kMTransX];
+        float ty = fMatrix[SkMatrix::kMTransY];
+        do {
+            dst->fY = src->fY * my + ty;
+            dst->fX = src->fX * mx + tx;
+            src += 1;
+            dst += 1;
+        } while (--count);
+    }
+ private:
+    double fMatrix [9];
+    SkPoint fSrc [16];
+    SkPoint fDst [16];
+    int fCount;
+    SkRandom fRandom;
+    typedef MatrixBench INHERITED;
+};
+#endif
+
+
+
+
 
 static SkBenchmark* M0(void* p) { return new EqualsMatrixBench(p); }
 static SkBenchmark* M1(void* p) { return new ScaleMatrixBench(p); }
@@ -227,3 +309,10 @@
 static BenchRegistry gReg2(M2);
 static BenchRegistry gReg3(M3);
 static BenchRegistry gReg4(M4);
+
+#ifdef SK_SCALAR_IS_FLOAT
+static SkBenchmark* FlM0(void* p) { return new ScaleTransMixedMatrixBench(p); }
+static SkBenchmark* FlM1(void* p) { return new ScaleTransDoubleMatrixBench(p); }
+static BenchRegistry gFlReg5(FlM0);
+static BenchRegistry gFlReg6(FlM1);
+#endif
diff --git a/bench/ScalarBench.cpp b/bench/ScalarBench.cpp
new file mode 100644
index 0000000..29fe5c4
--- /dev/null
+++ b/bench/ScalarBench.cpp
@@ -0,0 +1,97 @@
+#include "SkBenchmark.h"
+#include "SkFloatBits.h"
+#include "SkRandom.h"
+#include "SkString.h"
+
+class ScalarBench : public SkBenchmark {
+    SkString    fName;
+    enum { N = 100000 };
+public:
+    ScalarBench(void* param, const char name[]) : INHERITED(param) {
+        fName.printf("scalar_%s", name);
+    }
+
+    virtual void performTest() = 0;
+
+protected:
+    virtual int mulLoopCount() const { return 1; }
+
+    virtual const char* onGetName() {
+        return fName.c_str();
+    }
+
+    virtual void onDraw(SkCanvas* canvas) {
+        int n = N * this->mulLoopCount();
+        for (int i = 0; i < n; i++) {
+            this->performTest();
+        }
+    }
+
+private:
+    typedef SkBenchmark INHERITED;
+};
+
+// we want to stop the compiler from eliminating code that it thinks is a no-op
+// so we have a non-static global we increment, hoping that will convince the
+// compiler to execute everything
+int gScalarBench_NonStaticGlobal;
+
+#define always_do(pred)                     \
+    do {                                    \
+        if (pred) {                         \
+            ++gScalarBench_NonStaticGlobal; \
+        }                                   \
+    } while (0)
+
+// having unknown values in our arrays can throw off the timing a lot, perhaps
+// handling NaN values is a lot slower. Anyway, this guy is just meant to put
+// reasonable values in our arrays.
+template <typename T> void init9(T array[9]) {
+    SkRandom rand;
+    for (int i = 0; i < 9; i++) {
+        array[i] = rand.nextSScalar1();
+    }
+}
+
+class FloatComparisonBench : public ScalarBench {
+public:
+    FloatComparisonBench(void* param) : INHERITED(param, "compare_float") {
+        init9(fArray);
+    }
+protected:
+    virtual int mulLoopCount() const { return 4; }
+    virtual void performTest() {
+        always_do(fArray[6] != 0.0f || fArray[7] != 0.0f || fArray[8] != 1.0f);
+        always_do(fArray[2] != 0.0f || fArray[5] != 0.0f);
+    }
+private:
+    float fArray[9];
+    typedef ScalarBench INHERITED;
+};
+
+class ForcedIntComparisonBench : public ScalarBench {
+public:
+    ForcedIntComparisonBench(void* param)
+        : INHERITED(param, "compare_forced_int") {
+        init9(fArray);
+    }
+protected:
+    virtual int mulLoopCount() const { return 4; }
+    virtual void performTest() {
+        always_do(SkScalarAs2sCompliment(fArray[6]) |
+                  SkScalarAs2sCompliment(fArray[7]) |
+                  (SkScalarAs2sCompliment(fArray[8]) - kPersp1Int));
+        always_do(SkScalarAs2sCompliment(fArray[2]) |
+                  SkScalarAs2sCompliment(fArray[5]));
+    }
+private:
+    static const int32_t kPersp1Int = 0x3f800000;
+    SkScalar fArray[9];
+    typedef ScalarBench INHERITED;
+};
+
+static SkBenchmark* S0(void* p) { return new FloatComparisonBench(p); }
+static SkBenchmark* S1(void* p) { return new ForcedIntComparisonBench(p); }
+
+static BenchRegistry gReg0(S0);
+static BenchRegistry gReg1(S1);
diff --git a/bench/benchmain.cpp b/bench/benchmain.cpp
index 066573a..34f8a1a 100644
--- a/bench/benchmain.cpp
+++ b/bench/benchmain.cpp
@@ -5,12 +5,12 @@
 #include "SkNWayCanvas.h"
 #include "SkPicture.h"
 #include "SkString.h"
-#include "SkTime.h"
 #include "GrContext.h"
 #include "SkGpuDevice.h"
 #include "SkEGLContext.h"
 
 #include "SkBenchmark.h"
+#include "BenchTimer.h"
 
 #ifdef ANDROID
 static void log_error(const char msg[]) { SkDebugf("%s", msg); }
@@ -212,6 +212,9 @@
     bool forceAA = true;
     bool forceFilter = false;
     SkTriState::State forceDither = SkTriState::kDefault;
+    bool timerWall = false;
+    bool timerCpu = true;
+    bool timerGpu = true;
     bool doScale = false;
     bool doRotate = false;
     bool doClip = false;
@@ -246,6 +249,23 @@
                 log_error("missing arg for -repeat\n");
                 return -1;
             }
+        } else if (strcmp(*argv, "-timers") == 0) {
+            argv++;
+            if (argv < stop) {
+                timerWall = false;
+                timerCpu = false;
+                timerGpu = false;
+                for (char* t = *argv; *t; ++t) {
+                    switch (*t) {
+                    case 'w': timerWall = true; break;
+                    case 'c': timerCpu = true; break;
+                    case 'g': timerGpu = true; break;
+                    }
+                }
+            } else {
+                log_error("missing arg for -timers\n");
+                return -1;
+            }
         } else if (!strcmp(*argv, "-rotate")) {
             doRotate = true;
         } else if (!strcmp(*argv, "-scale")) {
@@ -346,6 +366,8 @@
         context = GrContext::CreateGLShaderContext();
     }
     
+    BenchTimer timer = BenchTimer();
+    
     Iter iter(&defineDict);
     SkBenchmark* bench;
     while ((bench = iter.next()) != NULL) {
@@ -399,30 +421,36 @@
                 performRotate(&canvas, dim.fX, dim.fY);
             }
             
+            bool gpu = kGPU_Backend == backend && context;
             //warm up caches if needed
             if (repeatDraw > 1) {
                 SkAutoCanvasRestore acr(&canvas, true);
                 bench->draw(&canvas);
-                if (kGPU_Backend == backend && context) {
+                if (gpu) {
                     context->flush();
                     glFinish();
                 }
             }
             
-            SkMSec now = SkTime::GetMSecs();
+            timer.start();
             for (int i = 0; i < repeatDraw; i++) {
                 SkAutoCanvasRestore acr(&canvas, true);
                 bench->draw(&canvas);
             }
-            if (kGPU_Backend == backend && context) {
-                context->flush();
-                glFinish();
-            }
+            timer.end();
             
             if (repeatDraw > 1) {
-                double duration = SkTime::GetMSecs() - now;
                 SkString str;
-                str.printf("  %4s: msecs = %5.2f", configName, duration / repeatDraw);
+                str.printf("  %4s:", configName);
+                if (timerWall) {
+                    str.appendf(" msecs = %6.2f", timer.fWall / repeatDraw);
+                }
+                if (timerCpu) {
+                    str.appendf(" cmsecs = %6.2f", timer.fCpu / repeatDraw);
+                }
+                if (timerGpu && gpu && timer.fGpu > 0) {
+                    str.appendf(" gmsecs = %6.2f", timer.fGpu / repeatDraw);
+                }
                 log_progress(str);
             }
             if (outDir.size() > 0) {
diff --git a/gm/gmmain.cpp b/gm/gmmain.cpp
index ea205e3..37c3ee3 100644
--- a/gm/gmmain.cpp
+++ b/gm/gmmain.cpp
@@ -120,7 +120,7 @@
 }
 
 static bool compare(const SkBitmap& target, const SkBitmap& base,
-                    const SkString& name, const char* modeDescriptor,
+                    const SkString& name, const char* renderModeDescriptor,
                     SkBitmap* diff) {
     SkBitmap copy;
     const SkBitmap* bm = &target;
@@ -134,8 +134,9 @@
     const int w = bm->width();
     const int h = bm->height();
     if (w != base.width() || h != base.height()) {
-        SkDebugf("---- %s dimensions mismatch for %s base [%d %d] current [%d %d]\n",
-                 modeDescriptor, name.c_str(),
+        SkDebugf(
+"---- %s dimensions mismatch for %s base [%d %d] current [%d %d]\n",
+                 renderModeDescriptor, name.c_str(),
                  base.width(), base.height(), w, h);
         return false;
     }
@@ -148,8 +149,9 @@
             SkPMColor c0 = *base.getAddr32(x, y);
             SkPMColor c1 = *bm->getAddr32(x, y);
             if (c0 != c1) {
-                SkDebugf("----- %s pixel mismatch for %s at [%d %d] base 0x%08X current 0x%08X\n",
-                         modeDescriptor, name.c_str(), x, y, c0, c1);
+                SkDebugf(
+"----- %s pixel mismatch for %s at [%d %d] base 0x%08X current 0x%08X\n",
+                         renderModeDescriptor, name.c_str(), x, y, c0, c1);
 
                 if (diff) {
                     diff->setConfig(SkBitmap::kARGB_8888_Config, w, h);
@@ -250,17 +252,17 @@
 
 static bool write_reference_image(const ConfigData& gRec,
                                   const char writePath [],
-                                  const char writePathSuffix [],
+                                  const char renderModeDescriptor [],
                                   const SkString& name,
                                   SkBitmap& bitmap,
                                   SkDynamicMemoryWStream* pdf) {
     SkString path;
     bool success = false;
     if (gRec.fBackend != kPDF_Backend) {
-        path = make_filename(writePath, writePathSuffix, name, "png");
+        path = make_filename(writePath, renderModeDescriptor, name, "png");
         success = write_bitmap(path, bitmap);
     } else if (pdf) {
-        path = make_filename(writePath, writePathSuffix, name, "pdf");
+        path = make_filename(writePath, renderModeDescriptor, name, "pdf");
         success = write_pdf(path, *pdf);
     }
     if (!success) {
@@ -273,7 +275,7 @@
                                        const SkString& name,
                                        SkBitmap &bitmap,
                                        const char diffPath [],
-                                       const char modeDescriptor []) {
+                                       const char renderModeDescriptor []) {
     SkString path = make_filename(readPath, "", name, "png");
     SkBitmap orig;
     bool success = SkImageDecoder::DecodeFile(path.c_str(), &orig,
@@ -281,7 +283,7 @@
                         SkImageDecoder::kDecodePixels_Mode, NULL);
     if (success) {
         SkBitmap diffBitmap;
-        success = compare(bitmap, orig, name, modeDescriptor,
+        success = compare(bitmap, orig, name, renderModeDescriptor,
                           diffPath ? &diffBitmap : NULL);
         if (!success && diffPath) {
             SkString diffName = make_filename(diffPath, "", name, ".diff.png");
@@ -299,18 +301,18 @@
                                 const char writePath [],
                                 const char readPath [],
                                 const char diffPath [],
-                                const char writePathSuffix [],
+                                const char renderModeDescriptor [],
                                 SkBitmap& bitmap,
                                 SkDynamicMemoryWStream* pdf) {
     SkString name = make_name(gm->shortName(), gRec.fName);
 
     if (writePath) {
-        write_reference_image(gRec, writePath, writePathSuffix,
+        write_reference_image(gRec, writePath, renderModeDescriptor,
                               name, bitmap, pdf);
     // TODO: Figure out a way to compare PDFs.
     } else if (readPath && gRec.fBackend != kPDF_Backend) {
         return compare_to_reference_image(readPath, name, bitmap,
-                                   diffPath, writePathSuffix);
+                                   diffPath, renderModeDescriptor);
     }
     return true;
 }
@@ -377,7 +379,6 @@
 
 static bool test_picture_playback(GM* gm,
                                   const ConfigData& gRec,
-                                  const char writePath [],
                                   const char readPath [],
                                   const char diffPath []) {
     SkPicture* pict = generate_new_picture(gm);
@@ -386,7 +387,7 @@
     if (kRaster_Backend == gRec.fBackend) {
         SkBitmap bitmap;
         generate_image_from_picture(gm, gRec, pict, &bitmap);
-        return handle_test_results(gm, gRec, writePath, readPath, diffPath,
+        return handle_test_results(gm, gRec, NULL, readPath, diffPath,
                             "-replay", bitmap, NULL);
     }
     return true;
@@ -394,7 +395,6 @@
 
 static bool test_picture_serialization(GM* gm,
                                        const ConfigData& gRec,
-                                       const char writePath [],
                                        const char readPath [],
                                        const char diffPath []) {
     SkPicture* pict = generate_new_picture(gm);
@@ -405,7 +405,7 @@
     if (kRaster_Backend == gRec.fBackend) {
         SkBitmap bitmap;
         generate_image_from_picture(gm, gRec, repict, &bitmap);
-        return handle_test_results(gm, gRec, writePath, readPath, diffPath,
+        return handle_test_results(gm, gRec, NULL, readPath, diffPath,
                             "-serialize", bitmap, NULL);
     }
     return true;
@@ -493,29 +493,31 @@
 
     // Accumulate success of all tests so we can flag error in any
     // one with the return value.
-    bool testSuccess = true;
+    bool overallSuccess = true;
     while ((gm = iter.next()) != NULL) {
         SkISize size = gm->getISize();
         SkDebugf("drawing... %s [%d %d]\n", gm->shortName(),
                  size.width(), size.height());
 
         for (size_t i = 0; i < SK_ARRAY_COUNT(gRec); i++) {
-            testSuccess &= test_drawing(gm, gRec[i],
+            bool testSuccess = test_drawing(gm, gRec[i],
                          writePath, readPath, diffPath, context);
+            overallSuccess &= testSuccess;
 
-            if (doReplay) {
-                testSuccess &= test_picture_playback(gm, gRec[i],
-                                      writePath, readPath, diffPath);
+            if (doReplay && testSuccess) {
+                testSuccess = test_picture_playback(gm, gRec[i],
+                                      readPath, diffPath);
+                overallSuccess &= testSuccess;
             }
 
-            if (doSerialize) {
-                testSuccess &= test_picture_serialization(gm, gRec[i],
-                                           writePath, readPath, diffPath);
+            if (doSerialize && testSuccess) {
+                overallSuccess &= test_picture_serialization(gm, gRec[i],
+                                           readPath, diffPath);
             }
         }
         SkDELETE(gm);
     }
-    if (false == testSuccess) {
+    if (false == overallSuccess) {
         return -1;
     }
     return 0;
diff --git a/gpu/include/GrContext.h b/gpu/include/GrContext.h
index 58c53ba..8809271 100644
--- a/gpu/include/GrContext.h
+++ b/gpu/include/GrContext.h
@@ -66,6 +66,13 @@
     void contextLost();
 
     /**
+     * Similar to contextLost, but makes no attempt to reset state.
+     * Use this method when GrContext destruction is pending, but
+     * the graphics context is destroyed first.
+     */
+    void contextDestroyed();
+
+    /**
      * Frees gpu created by the context. Can be called to reduce GPU memory
      * pressure.
      */
diff --git a/gpu/include/GrGpu.h b/gpu/include/GrGpu.h
index 574a430..5cb885a 100644
--- a/gpu/include/GrGpu.h
+++ b/gpu/include/GrGpu.h
@@ -325,7 +325,7 @@
      * Called to tell Gpu object that all GrResources have been lost and should
      * be abandoned.
      */
-    void abandonResources();
+    virtual void abandonResources();
 
     /**
      * Called to tell Gpu object to release all GrResources.
diff --git a/gpu/src/GrContext.cpp b/gpu/src/GrContext.cpp
index dae1cd1..4ebf225 100644
--- a/gpu/src/GrContext.cpp
+++ b/gpu/src/GrContext.cpp
@@ -57,7 +57,7 @@
 }
 
 GrContext* GrContext::CreateGLShaderContext() {
-    return GrContext::Create(kOpenGL_Shaders_GrEngine, NULL);
+    return GrContext::Create(kOpenGL_Shaders_GrEngine, 0);
 }
 
 GrContext::~GrContext() {
@@ -74,6 +74,11 @@
 }
 
 void GrContext::contextLost() {
+    contextDestroyed();
+    this->setupDrawBuffer();
+}
+
+void GrContext::contextDestroyed() {
     // abandon first to so destructors
     // don't try to free the resources in the API.
     fGpu->abandonResources();
@@ -93,8 +98,6 @@
     fTextureCache->removeAll();
     fFontCache->freeAll();
     fGpu->markContextDirty();
-
-    this->setupDrawBuffer();
 }
 
 void GrContext::resetContext() {
@@ -1192,6 +1195,9 @@
         GrRect pathBounds = path.getBounds();
         GrIRect pathIBounds;
         if (!pathBounds.isEmpty()) {
+            if (NULL != translate) {
+                pathBounds.offset(*translate);
+            }
             target->getViewMatrix().mapRect(&pathBounds, pathBounds);
             pathBounds.roundOut(&pathIBounds);
             if (!bound.intersect(pathIBounds)) {
@@ -1242,8 +1248,10 @@
 
 void GrContext::flushDrawBuffer() {
 #if BATCH_RECT_TO_RECT || DEFER_TEXT_RENDERING
-    fDrawBuffer->playback(fGpu);
-    fDrawBuffer->reset();
+    if (fDrawBuffer) {
+        fDrawBuffer->playback(fGpu);
+        fDrawBuffer->reset();
+    }
 #endif
 }
 
@@ -1302,6 +1310,7 @@
     matrix.setTranslate(GrIntToScalar(left), GrIntToScalar(top));
     fGpu->setViewMatrix(matrix);
 
+    fGpu->setColorFilter(0, SkXfermode::kDst_Mode);
     fGpu->disableState(GrDrawTarget::kClip_StateBit);
     fGpu->setAlpha(0xFF);
     fGpu->setBlendFunc(kOne_BlendCoeff,
diff --git a/gpu/src/GrGpu.cpp b/gpu/src/GrGpu.cpp
index 4fe7ccc..4f260c7 100644
--- a/gpu/src/GrGpu.cpp
+++ b/gpu/src/GrGpu.cpp
@@ -527,7 +527,7 @@
                         }
                     } else {
                         SET_RANDOM_COLOR
-                        this->drawSimpleRect(bounds, 0, NULL);
+                        this->drawSimpleRect(bounds, NULL, 0);
                     }
                 }
             }
diff --git a/gpu/src/GrGpuGLShaders.cpp b/gpu/src/GrGpuGLShaders.cpp
index 50be67f..0a933b5 100644
--- a/gpu/src/GrGpuGLShaders.cpp
+++ b/gpu/src/GrGpuGLShaders.cpp
@@ -130,6 +130,11 @@
     }
 };
 
+void GrGpuGLShaders::abandonResources(){
+    INHERITED::abandonResources();
+    fProgramCache->abandon();
+}
+
 void GrGpuGLShaders::DeleteProgram(GrGLProgram::CachedData* programData) {
     GR_GL(DeleteShader(programData->fVShaderID));
     GR_GL(DeleteShader(programData->fFShaderID));
diff --git a/gpu/src/GrGpuGLShaders.h b/gpu/src/GrGpuGLShaders.h
index 9392d1c..557a4e3 100644
--- a/gpu/src/GrGpuGLShaders.h
+++ b/gpu/src/GrGpuGLShaders.h
@@ -31,6 +31,8 @@
 
     virtual void resetContext();
 
+    virtual void abandonResources();
+
 protected:
     // overrides from GrGpu
     virtual bool flushGraphicsState(GrPrimitiveType type);
diff --git a/gpu/src/android/GrGLDefaultInterface_android.cpp b/gpu/src/android/GrGLDefaultInterface_android.cpp
new file mode 100644
index 0000000..e52277e
--- /dev/null
+++ b/gpu/src/android/GrGLDefaultInterface_android.cpp
@@ -0,0 +1,138 @@
+// Modified from chromium/src/webkit/glue/gl_bindings_skia_cmd_buffer.cc
+
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "GrGLInterface.h"
+
+#ifndef GL_GLEXT_PROTOTYPES
+#define GL_GLEXT_PROTOTYPES
+#endif
+
+#include "gl2.h"
+#include "gl2ext.h"
+/*
+#include "gpu/GLES2/gl2.h"
+#include "gpu/GLES2/gl2ext.h"
+*/
+void GrGLSetDefaultGLInterface() {
+  static GrGLInterface cmd_buffer_interface = {
+    kES2_GrGLBinding,
+
+    glActiveTexture,
+    glAttachShader,
+    glBindAttribLocation,
+    glBindBuffer,
+    glBindTexture,
+    glBlendColor,
+    glBlendFunc,
+    glBufferData,
+    glBufferSubData,
+    glClear,
+    glClearColor,
+    glClearStencil,
+    NULL,  // glClientActiveTexture
+    NULL,  // glColor4ub
+    glColorMask,
+    NULL,  // glColorPointer
+    glCompileShader,
+    glCompressedTexImage2D,
+    glCreateProgram,
+    glCreateShader,
+    glCullFace,
+    glDeleteBuffers,
+    glDeleteProgram,
+    glDeleteShader,
+    glDeleteTextures,
+    glDepthMask,
+    glDisable,
+    NULL,  // glDisableClientState
+    glDisableVertexAttribArray,
+    glDrawArrays,
+    glDrawElements,
+    glEnable,
+    NULL,  // glEnableClientState
+    glEnableVertexAttribArray,
+    glFrontFace,
+    glGenBuffers,
+    glGenTextures,
+    glGetBufferParameteriv,
+    glGetError,
+    glGetIntegerv,
+    glGetProgramInfoLog,
+    glGetProgramiv,
+    glGetShaderInfoLog,
+    glGetShaderiv,
+    glGetString,
+    glGetUniformLocation,
+    glLineWidth,
+    glLinkProgram,
+    NULL,  // glLoadMatrixf
+    NULL,  // glMatrixMode
+    glPixelStorei,
+    NULL,  // glPointSize
+    glReadPixels,
+    glScissor,
+    NULL,  // glShadeModel
+    glShaderSource,
+    glStencilFunc,
+    glStencilFuncSeparate,
+    glStencilMask,
+    glStencilMaskSeparate,
+    glStencilOp,
+    glStencilOpSeparate,
+    NULL,  // glTexCoordPointer
+    NULL,  // glTexEnvi
+    glTexImage2D,
+    glTexParameteri,
+    glTexSubImage2D,
+    glUniform1f,
+    glUniform1i,
+    glUniform1fv,
+    glUniform1iv,
+    glUniform2f,
+    glUniform2i,
+    glUniform2fv,
+    glUniform2iv,
+    glUniform3f,
+    glUniform3i,
+    glUniform3fv,
+    glUniform3iv,
+    glUniform4f,
+    glUniform4i,
+    glUniform4fv,
+    glUniform4iv,
+    glUniformMatrix2fv,
+    glUniformMatrix3fv,
+    glUniformMatrix4fv,
+    glUseProgram,
+    glVertexAttrib4fv,
+    glVertexAttribPointer,
+    NULL,  // glVertexPointer
+    glViewport,
+    glBindFramebuffer,
+    glBindRenderbuffer,
+    glCheckFramebufferStatus,
+    glDeleteFramebuffers,
+    glDeleteRenderbuffers,
+    glFramebufferRenderbuffer,
+    glFramebufferTexture2D,
+    glGenFramebuffers,
+    glGenRenderbuffers,
+    glRenderbufferStorage,
+    NULL,  // glRenderbufferStorageMultisampleEXT,
+    NULL,  // glBlitFramebufferEXT,
+    NULL,  // glResolveMultisampleFramebuffer
+    glMapBufferOES,
+    glUnmapBufferOES,
+    NULL,
+    GrGLInterface::kStaticInitEndGuard
+  };
+  static bool host_StubGL_initialized = false;
+  if (!host_StubGL_initialized) {
+    GrGLSetGLInterface(&cmd_buffer_interface);
+    host_StubGL_initialized = true;
+  }
+}
+
diff --git a/gpu/src/app-android.cpp b/gpu/src/app-android.cpp
index eea9a4d..e07aa1d 100644
--- a/gpu/src/app-android.cpp
+++ b/gpu/src/app-android.cpp
@@ -14,8 +14,8 @@
 
 static GrContext* make_context() {
     SkDebugf("---- before create\n");
-    GrContext* ctx = GrContext::Create(GrGpu::kOpenGL_Shaders_Engine, NULL);
-//    GrContext* ctx = GrContext::Create(GrGpu::kOpenGL_Fixed_Engine, NULL);
+    GrContext* ctx = GrContext::Create(GrGpu::kOpenGL_Shaders_Engine, 0);
+//    GrContext* ctx = GrContext::Create(GrGpu::kOpenGL_Fixed_Engine, 0);
     SkDebugf("---- after create %p\n", ctx);
     return ctx;
 }
diff --git a/gpu/src/skia/SkUIView.mm b/gpu/src/skia/SkUIView.mm
index 667a474..8cd6c77 100644
--- a/gpu/src/skia/SkUIView.mm
+++ b/gpu/src/skia/SkUIView.mm
@@ -362,9 +362,9 @@
     // should be pthread-local at least
     if (NULL == gCtx) {        
 #ifdef USE_GL_1
-        gCtx = GrContext::Create(GrGpu::kOpenGL_Fixed_Engine, NULL);
+        gCtx = GrContext::Create(GrGpu::kOpenGL_Fixed_Engine, 0);
 #else
-        gCtx = GrContext::Create(GrGpu::kOpenGL_Shaders_Engine, NULL);
+        gCtx = GrContext::Create(GrGpu::kOpenGL_Shaders_Engine, 0);
 #endif
     }
     return gCtx;
diff --git a/include/core/SkDrawFilter.h b/include/core/SkDrawFilter.h
index 1434391..c8af187 100644
--- a/include/core/SkDrawFilter.h
+++ b/include/core/SkDrawFilter.h
@@ -44,7 +44,7 @@
      *  Called with the paint that will be used to draw the specified type.
      *  The implementation may modify the paint as they wish.
      */
-    virtual void filter(SkPaint*, Type) {}
+    virtual void filter(SkPaint*, Type) = 0;
 };
 
 #endif
diff --git a/include/core/SkFixed.h b/include/core/SkFixed.h
index c58c6de..8b56c50 100644
--- a/include/core/SkFixed.h
+++ b/include/core/SkFixed.h
@@ -111,6 +111,9 @@
 #define SkFixedAbs(x)       SkAbs32(x)
 #define SkFixedAve(a, b)    (((a) + (b)) >> 1)
 
+// The same as SkIntToFixed(SkFixedFloor(x))
+#define SkFixedFloorToFixed(x)  ((x) & ~0xFFFF)
+
 SkFixed SkFixedMul_portable(SkFixed, SkFixed);
 SkFract SkFractMul_portable(SkFract, SkFract);
 inline SkFixed SkFixedSquare_portable(SkFixed value)
diff --git a/include/core/SkFontHost.h b/include/core/SkFontHost.h
index d0f7c65..e20ea05 100644
--- a/include/core/SkFontHost.h
+++ b/include/core/SkFontHost.h
@@ -150,14 +150,22 @@
     */
     static SkScalerContext* CreateScalerContext(const SkDescriptor* desc);
 
-    /** Given a "current" fontID, return the next logical fontID to use
-        when searching fonts for a given unicode value. Typically the caller
-        will query a given font, and if a unicode value is not supported, they
-        will call this, and if 0 is not returned, will search that font, and so
-        on. This process must be finite, and when the fonthost sees a
-        font with no logical successor, it must return 0.
-    */
-    static uint32_t NextLogicalFont(SkFontID fontID);
+    /**
+     *  Given a "current" fontID, return the next logical fontID to use
+     *  when searching fonts for a given unicode value. Typically the caller
+     *  will query a given font, and if a unicode value is not supported, they
+     *  will call this, and if 0 is not returned, will search that font, and so
+     *  on. This process must be finite, and when the fonthost sees a
+     *  font with no logical successor, it must return 0.
+     *
+     *  The original fontID is also provided. This is the initial font that was
+     *  stored in the typeface of the caller. It is provided as an aid to choose
+     *  the best next logical font. e.g. If the original font was bold or serif,
+     *  but the 2nd in the logical chain was plain, then a subsequent call to
+     *  get the 3rd can still inspect the original, and try to match its
+     *  stylistic attributes.
+     */
+    static SkFontID NextLogicalFont(SkFontID currFontID, SkFontID origFontID);
 
     ///////////////////////////////////////////////////////////////////////////
 
diff --git a/include/core/SkScalar.h b/include/core/SkScalar.h
index 5dbf684..ba113f4 100644
--- a/include/core/SkScalar.h
+++ b/include/core/SkScalar.h
@@ -18,6 +18,7 @@
 #define SkScalar_DEFINED
 
 #include "SkFixed.h"
+#include "SkFloatingPoint.h"
 
 /** \file SkScalar.h
 
@@ -29,7 +30,6 @@
 */
 
 #ifdef SK_SCALAR_IS_FLOAT
-    #include "SkFloatingPoint.h"
 
     /** SkScalar is our type for fractional values and coordinates. Depending on
         compile configurations, it is either represented as an IEEE float, or
diff --git a/include/core/SkScalerContext.h b/include/core/SkScalerContext.h
index 3f818a3..cbbbdf0 100644
--- a/include/core/SkScalerContext.h
+++ b/include/core/SkScalerContext.h
@@ -189,6 +189,7 @@
     };
 public:
     struct Rec {
+        uint32_t    fOrigFontID;
         uint32_t    fFontID;
         SkScalar    fTextSize, fPreScaleX, fPreSkewX;
         SkScalar    fPost2x2[2][2];
diff --git a/include/effects/SkLayerDrawLooper.h b/include/effects/SkLayerDrawLooper.h
index acc4f9b..8627ae4 100644
--- a/include/effects/SkLayerDrawLooper.h
+++ b/include/effects/SkLayerDrawLooper.h
@@ -74,12 +74,12 @@
     /**
      *  This layer will draw with the original paint, ad the specified offset
      */
-    SkPaint* addLayer(SkScalar dx, SkScalar dy);
+    void addLayer(SkScalar dx, SkScalar dy);
     
     /**
      *  This layer will with the original paint and no offset.
      */
-    SkPaint* addLayer() { return this->addLayer(0, 0); }
+    void addLayer() { this->addLayer(0, 0); }
     
     // overrides from SkDrawLooper
     virtual void init(SkCanvas*);
diff --git a/include/views/SkOSWindow_Unix.h b/include/views/SkOSWindow_Unix.h
index 803ca13..45b53d5 100644
--- a/include/views/SkOSWindow_Unix.h
+++ b/include/views/SkOSWindow_Unix.h
@@ -63,13 +63,11 @@
 private:
     SkUnixWindow  fUnixWindow;
     bool fGLAttached;
-    bool fRestart;
 
     // Needed for GL
     XVisualInfo* fVi;
 
     void    doPaint();
-    void    restartLoop();
     void    mapWindowAndWait();
 
     typedef SkWindow INHERITED;
diff --git a/samplecode/SampleApp.cpp b/samplecode/SampleApp.cpp
index 3efffe6..f05fe6e 100644
--- a/samplecode/SampleApp.cpp
+++ b/samplecode/SampleApp.cpp
@@ -848,7 +848,7 @@
             break;
 #ifdef SK_SUPPORT_GL
         case kGPU_CanvasType:
-            if (fShowZoomer) {
+            if (fShowZoomer && fGpuCanvas) {
                 this->showZoomer(fGpuCanvas);
             }
             delete fGpuCanvas;
diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp
index dbd1bfa..dec355a 100644
--- a/src/core/SkBlitter_ARGB32.cpp
+++ b/src/core/SkBlitter_ARGB32.cpp
@@ -295,14 +295,10 @@
     int height = clip.height();
 
 #if defined(SK_SUPPORT_LCDTEXT)
-    const bool      lcdMode = mask.fFormat == SkMask::kHorizontalLCD_Format;
-    const bool      verticalLCDMode = mask.fFormat == SkMask::kVerticalLCD_Format;
-#endif
+    const bool lcdMode = mask.fFormat == SkMask::kHorizontalLCD_Format;
+    const bool verticalLCDMode = mask.fFormat == SkMask::kVerticalLCD_Format;
 
     // In LCD mode the masks have either an extra couple of rows or columns on the edges.
-    SkPMColor srcColor = fPMColor;
-
-#if defined(SK_SUPPORT_LCDTEXT)
     if (lcdMode || verticalLCDMode) {
         int widthAdjustment, heightAdjustment;
         const uint32_t* alpha32;
@@ -313,6 +309,7 @@
 
         unsigned devRB = fDevice.rowBytes() - (width << 2);
         unsigned alphaExtraRowWords = mask.rowWordsLCD() - width;
+        SkPMColor srcColor = fPMColor;
 
         do {
             unsigned w = width;
diff --git a/src/core/SkCanvas.cpp b/src/core/SkCanvas.cpp
index e2d70ee..7d29f74 100644
--- a/src/core/SkCanvas.cpp
+++ b/src/core/SkCanvas.cpp
@@ -1064,7 +1064,7 @@
         return true;
     }
 
-    if (fMCRec->fMatrix->getType() & SkMatrix::kPerspective_Mask) {
+    if (fMCRec->fMatrix->hasPerspective()) {
         SkRect dst;
         fMCRec->fMatrix->mapRect(&dst, rect);
         SkIRect idst;
@@ -1666,4 +1666,3 @@
 const SkRegion& SkCanvas::LayerIter::clip() const { return fImpl->getClip(); }
 int SkCanvas::LayerIter::x() const { return fImpl->getX(); }
 int SkCanvas::LayerIter::y() const { return fImpl->getY(); }
-
diff --git a/src/core/SkDraw.cpp b/src/core/SkDraw.cpp
index 14e5a80..74a30e9 100644
--- a/src/core/SkDraw.cpp
+++ b/src/core/SkDraw.cpp
@@ -864,7 +864,7 @@
 // for that we'll transform (0,1) and (1,0), and check that the resulting dot-prod
 // is nearly one
 static bool map_radius(const SkMatrix& matrix, SkScalar* value) {
-    if (matrix.getType() & SkMatrix::kPerspective_Mask) {
+    if (matrix.hasPerspective()) {
         return false;
     }
     SkVector src[2], dst[2];
@@ -1562,7 +1562,7 @@
     }
 
     if (/*paint.isLinearText() ||*/
-        (fMatrix->getType() & SkMatrix::kPerspective_Mask)) {
+        (fMatrix->hasPerspective())) {
         this->drawText_asPaths(text, byteLength, x, y, paint);
         handle_aftertext(this, paint, underlineWidth, underlineStart);
         return;
@@ -1760,7 +1760,7 @@
     }
 
     if (/*paint.isLinearText() ||*/
-        (fMatrix->getType() & SkMatrix::kPerspective_Mask)) {
+        (fMatrix->hasPerspective())) {
         // TODO !!!!
 //      this->drawText_asPaths(text, byteLength, x, y, paint);
         return;
diff --git a/src/core/SkMatrix.cpp b/src/core/SkMatrix.cpp
index 06bca1e..da66a68 100644
--- a/src/core/SkMatrix.cpp
+++ b/src/core/SkMatrix.cpp
@@ -46,10 +46,6 @@
     this->setTypeMask(kIdentity_Mask | kRectStaysRect_Mask);
 }
 
-static inline int has_perspective(const SkMatrix& matrix) {
-    return matrix.getType() & SkMatrix::kPerspective_Mask;
-}
-
 // this guy aligns with the masks, so we can compute a mask from a varaible 0/1
 enum {
     kTranslate_Shift,
@@ -71,22 +67,36 @@
 uint8_t SkMatrix::computeTypeMask() const {
     unsigned mask = 0;
 
+#ifdef SK_SCALAR_SLOW_COMPARES
     if (SkScalarAs2sCompliment(fMat[kMPersp0]) |
             SkScalarAs2sCompliment(fMat[kMPersp1]) |
             (SkScalarAs2sCompliment(fMat[kMPersp2]) - kPersp1Int)) {
         mask |= kPerspective_Mask;
     }
-    
+
     if (SkScalarAs2sCompliment(fMat[kMTransX]) |
             SkScalarAs2sCompliment(fMat[kMTransY])) {
         mask |= kTranslate_Mask;
     }
+#else
+    // Benchmarking suggests that replacing this set of SkScalarAs2sCompliment
+    // is a win, but replacing those below is not. We don't yet understand
+    // that result.
+    if (fMat[kMPersp0] != 0 || fMat[kMPersp1] != 0 ||
+        fMat[kMPersp2] != kMatrix22Elem) {
+        mask |= kPerspective_Mask;
+    }
+
+    if (fMat[kMTransX] != 0 || fMat[kMTransY] != 0) {
+        mask |= kTranslate_Mask;
+    }
+#endif
 
     int m00 = SkScalarAs2sCompliment(fMat[SkMatrix::kMScaleX]);
     int m01 = SkScalarAs2sCompliment(fMat[SkMatrix::kMSkewX]);
     int m10 = SkScalarAs2sCompliment(fMat[SkMatrix::kMSkewY]);
     int m11 = SkScalarAs2sCompliment(fMat[SkMatrix::kMScaleY]);
-    
+
     if (m01 | m10) {
         mask |= kAffine_Mask;
     }
@@ -94,21 +104,21 @@
     if ((m00 - kScalar1Int) | (m11 - kScalar1Int)) {
         mask |= kScale_Mask;
     }
-    
+
     if ((mask & kPerspective_Mask) == 0) {
         // map non-zero to 1
         m00 = m00 != 0;
         m01 = m01 != 0;
         m10 = m10 != 0;
         m11 = m11 != 0;
-        
+
         // record if the (p)rimary and (s)econdary diagonals are all 0 or
         // all non-zero (answer is 0 or 1)
         int dp0 = (m00 | m11) ^ 1;  // true if both are 0
         int dp1 = m00 & m11;        // true if both are 1
         int ds0 = (m01 | m10) ^ 1;  // true if both are 0
         int ds1 = m01 & m10;        // true if both are 1
-        
+
         // return 1 if primary is 1 and secondary is 0 or
         // primary is 0 and secondary is 1
         mask |= ((dp0 & ds1) | (dp1 & ds0)) << kRectStaysRect_Shift;
@@ -151,7 +161,7 @@
 }
 
 bool SkMatrix::preTranslate(SkScalar dx, SkScalar dy) {
-    if (has_perspective(*this)) {
+    if (this->hasPerspective()) {
         SkMatrix    m;
         m.setTranslate(dx, dy);
         return this->preConcat(m);
@@ -169,7 +179,7 @@
 }
 
 bool SkMatrix::postTranslate(SkScalar dx, SkScalar dy) {
-    if (has_perspective(*this)) {
+    if (this->hasPerspective()) {
         SkMatrix    m;
         m.setTranslate(dx, dy);
         return this->postConcat(m);
@@ -754,7 +764,7 @@
     SkMatrix identity;
     const SkMatrix* use = this;
     bool ret = true;
-    if (has_perspective(*this)) {
+    if (this->hasPerspective()) {
         identity.reset();
         use = &identity;
         ret = false;
@@ -769,7 +779,7 @@
 }
 
 bool SkMatrix::invert(SkMatrix* inv) const {
-    int         isPersp = has_perspective(*this);
+    int         isPersp = this->hasPerspective();
     int         shift;
     SkDetScalar scale = sk_inv_determinant(fMat, isPersp, &shift);
 
@@ -951,7 +961,7 @@
 
 void SkMatrix::RotTrans_pts(const SkMatrix& m, SkPoint dst[],
                             const SkPoint src[], int count) {
-    SkASSERT((m.getType() & kPerspective_Mask) == 0);
+    SkASSERT(!m.hasPerspective());
 
     if (count > 0) {
         SkScalar mx = m.fMat[kMScaleX];
@@ -973,7 +983,7 @@
 
 void SkMatrix::Persp_pts(const SkMatrix& m, SkPoint dst[],
                          const SkPoint src[], int count) {
-    SkASSERT(m.getType() & kPerspective_Mask);
+    SkASSERT(m.hasPerspective());
 
 #ifdef SK_SCALAR_IS_FIXED
     SkFixed persp2 = SkFractToFixed(m.fMat[kMPersp2]);
@@ -1030,7 +1040,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 void SkMatrix::mapVectors(SkPoint dst[], const SkPoint src[], int count) const {
-    if (this->getType() & kPerspective_Mask) {
+    if (this->hasPerspective()) {
         SkPoint origin;
 
         MapXYProc proc = this->getMapXYProc();
@@ -1085,7 +1095,7 @@
 
 void SkMatrix::Persp_xy(const SkMatrix& m, SkScalar sx, SkScalar sy,
                         SkPoint* pt) {
-    SkASSERT(m.getType() & kPerspective_Mask);
+    SkASSERT(m.hasPerspective());
 
     SkScalar x = SkScalarMul(sx, m.fMat[kMScaleX]) +
                  SkScalarMul(sy, m.fMat[kMSkewX]) + m.fMat[kMTransX];
@@ -1747,4 +1757,3 @@
                 fMat[6], fMat[7], fMat[8]);
 #endif
 }
-
diff --git a/src/core/SkPaint.cpp b/src/core/SkPaint.cpp
index a607424..c29c4f0 100644
--- a/src/core/SkPaint.cpp
+++ b/src/core/SkPaint.cpp
@@ -1242,10 +1242,10 @@
 
 void SkScalerContext::MakeRec(const SkPaint& paint,
                               const SkMatrix* deviceMatrix, Rec* rec) {
-    SkASSERT(deviceMatrix == NULL ||
-             (deviceMatrix->getType() & SkMatrix::kPerspective_Mask) == 0);
+    SkASSERT(deviceMatrix == NULL || !deviceMatrix->hasPerspective());
 
-    rec->fFontID = SkTypeface::UniqueID(paint.getTypeface());
+    rec->fOrigFontID = SkTypeface::UniqueID(paint.getTypeface());
+    rec->fFontID = rec->fOrigFontID;
     rec->fTextSize = paint.getTextSize();
     rec->fPreScaleX = paint.getTextScaleX();
     rec->fPreSkewX  = paint.getTextSkewX();
diff --git a/src/core/SkPath.cpp b/src/core/SkPath.cpp
index ca57237..b88b20f 100644
--- a/src/core/SkPath.cpp
+++ b/src/core/SkPath.cpp
@@ -949,7 +949,7 @@
         dst = (SkPath*)this;
     }
 
-    if (matrix.getType() & SkMatrix::kPerspective_Mask) {
+    if (matrix.hasPerspective()) {
         SkPath  tmp;
         tmp.fFillType = fFillType;
 
@@ -1532,4 +1532,3 @@
     }
     return state.getConvexity();
 }
-
diff --git a/src/core/SkScalerContext.cpp b/src/core/SkScalerContext.cpp
index 6b5f663..05439f1 100644
--- a/src/core/SkScalerContext.cpp
+++ b/src/core/SkScalerContext.cpp
@@ -180,7 +180,7 @@
     // fonthost will determine the next possible font to search, based
     // on the current font in fRec. It will return NULL if ctx is our
     // last font that can be searched (i.e. ultimate fallback font)
-    uint32_t newFontID = SkFontHost::NextLogicalFont(rec.fFontID);
+    uint32_t newFontID = SkFontHost::NextLogicalFont(rec.fFontID, rec.fOrigFontID);
     if (0 == newFontID) {
         return NULL;
     }
diff --git a/src/core/SkScan_AntiPath.cpp b/src/core/SkScan_AntiPath.cpp
index 398f786..4dc2cd3 100644
--- a/src/core/SkScan_AntiPath.cpp
+++ b/src/core/SkScan_AntiPath.cpp
@@ -26,8 +26,20 @@
 #define SCALE   (1 << SHIFT)
 #define MASK    (SCALE - 1)
 
+/*
+    We have two techniques for capturing the output of the supersampler:
+    - SUPERMASK, which records a large mask-bitmap
+        this is often faster for small, complex objects
+    - RLE, which records a rle-encoded scanline
+        this is often faster for large objects with big spans
+
+    NEW_AA is a set of code-changes to try to make both paths produce identical
+    results. Its not quite there yet, though the remaining differences may be
+    in the subsequent blits, and not in the different masks/runs...
+ */
 //#define FORCE_SUPERMASK
 //#define FORCE_RLE
+//#define SK_SUPPORT_NEW_AA
 
 ///////////////////////////////////////////////////////////////////////////////
 
@@ -281,9 +293,17 @@
         edge of the current span round to the same super-sampled x value,
         I might overflow to 256 with this add, hence the funny subtract.
     */
+#ifdef SK_SUPPORT_NEW_AA
+    if (startAlpha) {
+        unsigned tmp = *alpha + startAlpha;
+        SkASSERT(tmp <= 256);
+        *alpha++ = SkToU8(tmp - (tmp >> 8));
+    }
+#else
     unsigned tmp = *alpha + startAlpha;
     SkASSERT(tmp <= 256);
     *alpha++ = SkToU8(tmp - (tmp >> 8));
+#endif
 
     if (middleCount >= MIN_COUNT_FOR_QUAD_LOOP) {
         // loop until we're quad-byte aligned
@@ -365,7 +385,15 @@
         SkASSERT(row < fMask.fImage + kMAX_STORAGE + 1);
         add_aa_span(row, coverage_to_alpha(fe - fb));
     } else {
+#ifdef SK_SUPPORT_NEW_AA
+        if (0 == fb) {
+            n += 1;
+        } else {
+            fb = (1 << SHIFT) - fb;
+        }
+#else
         fb = (1 << SHIFT) - fb;
+#endif
         SkASSERT(row >= fMask.fImage);
         SkASSERT(row + n + 1 < fMask.fImage + kMAX_STORAGE + 1);
         add_aa_span(row,  coverage_to_alpha(fb), n, coverage_to_alpha(fe),
diff --git a/src/core/SkShader.cpp b/src/core/SkShader.cpp
index 7b46953..b51705e 100644
--- a/src/core/SkShader.cpp
+++ b/src/core/SkShader.cpp
@@ -185,7 +185,7 @@
 SkShader::MatrixClass SkShader::ComputeMatrixClass(const SkMatrix& mat) {
     MatrixClass mc = kLinear_MatrixClass;
 
-    if (mat.getType() & SkMatrix::kPerspective_Mask) {
+    if (mat.hasPerspective()) {
         if (mat.fixedStepInX(0, NULL, NULL)) {
             mc = kFixedStepInX_MatrixClass;
         } else {
diff --git a/src/effects/SkLayerDrawLooper.cpp b/src/effects/SkLayerDrawLooper.cpp
index 34c3238..5d0fdcf 100644
--- a/src/effects/SkLayerDrawLooper.cpp
+++ b/src/effects/SkLayerDrawLooper.cpp
@@ -37,11 +37,11 @@
     return &rec->fPaint;
 }
 
-SkPaint* SkLayerDrawLooper::addLayer(SkScalar dx, SkScalar dy) {
+void SkLayerDrawLooper::addLayer(SkScalar dx, SkScalar dy) {
     LayerInfo info;
 
     info.fOffset.set(dx, dy);
-    return this->addLayer(info);
+    (void)this->addLayer(info);
 }
 
 void SkLayerDrawLooper::init(SkCanvas* canvas) {
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 7166a07..ae8ab6c 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -1211,16 +1211,23 @@
     }
 
     /*
-     *  Skia calls us with fx,fy already biased by 1/2. It does this to speed
-     *  up rounding these, so that all of its procs (like us) can just call
-     *  SkFixedFloor and get the "rounded" value.
+     *  What should we do with fy? (assuming horizontal/latin text)
      *
-     *  We take advantage of that for fx, where we pass a rounded value, but
-     *  we want the fractional fy, so we have to unbias it first.
+     *  The raster code calls SkFixedFloorToFixed on it, as it does with fx.
+     *  It calls that rather than round, because our caller has already added
+     *  SK_FixedHalf, so that calling floor gives us the rounded integer.
+     *
+     *  Test code between raster and gpu (they should draw the same)
+     *
+     *      canvas->drawText("Hamburgefons", 12, 0, 16.5f, paint);
+     *
+     *  Perhaps we should only perform this integralization if there is no
+     *  fExtMatrix...
      */
+    fy = SkFixedFloorToFixed(fy);
+
     procs->fTextContext->drawPackedGlyph(GrGlyph::Pack(glyph.getGlyphID(), fx, 0),
-                                         SkIntToFixed(SkFixedFloor(fx)),
-                                         fy - SK_FixedHalf,
+                                         SkFixedFloorToFixed(fx), fy,
                                          procs->fFontScaler);
 }
 
diff --git a/src/ports/SkFontHost_android.cpp b/src/ports/SkFontHost_android.cpp
index d01577d..e9b3986 100644
--- a/src/ports/SkFontHost_android.cpp
+++ b/src/ports/SkFontHost_android.cpp
@@ -632,7 +632,7 @@
     }
 }
 
-uint32_t SkFontHost::NextLogicalFont(uint32_t fontID) {
+SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
     load_system_fonts();
 
     /*  First see if fontID is already one of our fallbacks. If so, return
@@ -642,7 +642,7 @@
      */
     const uint32_t* list = gFallbackFonts;
     for (int i = 0; list[i] != 0; i++) {
-        if (list[i] == fontID) {
+        if (list[i] == currFontID) {
             return list[i+1];
         }
     }
diff --git a/src/ports/SkFontHost_fontconfig.cpp b/src/ports/SkFontHost_fontconfig.cpp
index 21fc773..332c911 100644
--- a/src/ports/SkFontHost_fontconfig.cpp
+++ b/src/ports/SkFontHost_fontconfig.cpp
@@ -364,8 +364,7 @@
     return NULL;
 }
 
-// static
-uint32_t SkFontHost::NextLogicalFont(SkFontID fontID) {
+SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
     // We don't handle font fallback, WebKit does.
     return 0;
 }
diff --git a/src/ports/SkFontHost_linux.cpp b/src/ports/SkFontHost_linux.cpp
index 37c2c35..9ede78c 100644
--- a/src/ports/SkFontHost_linux.cpp
+++ b/src/ports/SkFontHost_linux.cpp
@@ -578,7 +578,7 @@
     return 0;
 }
 
-uint32_t SkFontHost::NextLogicalFont(uint32_t fontID) {
+SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
     return 0;
 }
 
diff --git a/src/ports/SkFontHost_mac_atsui.cpp b/src/ports/SkFontHost_mac_atsui.cpp
index 5bc438a..fb61c60 100644
--- a/src/ports/SkFontHost_mac_atsui.cpp
+++ b/src/ports/SkFontHost_mac_atsui.cpp
@@ -491,9 +491,9 @@
     return new SkScalerContext_Mac(desc);
 }
 
-uint32_t SkFontHost::NextLogicalFont(uint32_t fontID) {
+SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
     uint32_t newFontID = find_default_fontID();
-    if (newFontID == fontID) {
+    if (newFontID == currFontID) {
         newFontID = 0;
     }
     return newFontID;
diff --git a/src/ports/SkFontHost_mac_coretext.cpp b/src/ports/SkFontHost_mac_coretext.cpp
index ad57e16..f9eba95 100644
--- a/src/ports/SkFontHost_mac_coretext.cpp
+++ b/src/ports/SkFontHost_mac_coretext.cpp
@@ -887,10 +887,10 @@
     return new SkScalerContext_Mac(desc);
 }
 
-SkFontID SkFontHost::NextLogicalFont(SkFontID fontID) {
+SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
     SkFontID nextFontID = 0;
     SkTypeface* face = GetDefaultFace();
-    if (face->uniqueID() != fontID) {
+    if (face->uniqueID() != currFontID) {
         nextFontID = face->uniqueID();
     }
     return nextFontID;
diff --git a/src/ports/SkFontHost_none.cpp b/src/ports/SkFontHost_none.cpp
index 91546f8..0593dd5 100644
--- a/src/ports/SkFontHost_none.cpp
+++ b/src/ports/SkFontHost_none.cpp
@@ -80,7 +80,7 @@
     return NULL;
 }
 
-uint32_t SkFontHost::NextLogicalFont(uint32_t fontID) {
+SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
     return 0;
 }
 
diff --git a/src/ports/SkFontHost_simple.cpp b/src/ports/SkFontHost_simple.cpp
index 54d326e..d63aec2 100644
--- a/src/ports/SkFontHost_simple.cpp
+++ b/src/ports/SkFontHost_simple.cpp
@@ -620,7 +620,7 @@
     }
 }
 
-uint32_t SkFontHost::NextLogicalFont(uint32_t fontID) {
+SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
     load_system_fonts();
 
     /*  First see if fontID is already one of our fallbacks. If so, return
@@ -630,7 +630,7 @@
      */
     const uint32_t* list = gFallbackFonts;
     for (int i = 0; list[i] != 0; i++) {
-        if (list[i] == fontID) {
+        if (list[i] == currFontID) {
             return list[i+1];
         }
     }
diff --git a/src/ports/SkFontHost_win.cpp b/src/ports/SkFontHost_win.cpp
index f5d126e..62b0a0c 100755
--- a/src/ports/SkFontHost_win.cpp
+++ b/src/ports/SkFontHost_win.cpp
@@ -179,7 +179,7 @@
     return face;
 }
 
-uint32_t SkFontHost::NextLogicalFont(uint32_t fontID) {
+SkFontID SkFontHost::NextLogicalFont(SkFontID currFontID, SkFontID origFontID) {
   // Zero means that we don't have any fallback fonts for this fontID.
   // This function is implemented on Android, but doesn't have much
   // meaning here.
diff --git a/src/utils/unix/SkOSWindow_Unix.cpp b/src/utils/unix/SkOSWindow_Unix.cpp
index ae881d5..652a1ae 100644
--- a/src/utils/unix/SkOSWindow_Unix.cpp
+++ b/src/utils/unix/SkOSWindow_Unix.cpp
@@ -18,8 +18,8 @@
     #include "keysym2ucs.h"
 }
 
-const int WIDTH = 1000;
-const int HEIGHT = 1000;
+const int WIDTH = 500;
+const int HEIGHT = 500;
 
 // Determine which events to listen for.
 const long EVENT_MASK = StructureNotifyMask|ButtonPressMask|ButtonReleaseMask
@@ -33,13 +33,16 @@
         // Attempt to create a window that supports GL
         GLint att[] = { GLX_RGBA, GLX_DEPTH_SIZE, 24, GLX_DOUBLEBUFFER,
                 GLX_STENCIL_SIZE, 8, None };
-        fVi = glXChooseVisual(dsp, 0, att);
+        fVi = glXChooseVisual(dsp, DefaultScreen(dsp), att);
         if (fVi) {
+            Colormap colorMap = XCreateColormap(dsp, RootWindow(dsp, fVi->screen),
+                fVi->visual, AllocNone);
             XSetWindowAttributes swa;
+            swa.colormap = colorMap;
             swa.event_mask = EVENT_MASK;
-            fUnixWindow.fWin = XCreateWindow(dsp, DefaultRootWindow(dsp),
+            fUnixWindow.fWin = XCreateWindow(dsp, RootWindow(dsp, fVi->screen),
                     0, 0, WIDTH, HEIGHT, 0, fVi->depth,
-                    InputOutput, fVi->visual, CWEventMask, &swa);
+                    InputOutput, fVi->visual, CWEventMask | CWColormap, &swa);
 
         } else {
             // Create a simple window instead.  We will not be able to
@@ -51,7 +54,6 @@
         fUnixWindow.fGc = XCreateGC(dsp, fUnixWindow.fWin, 0, NULL);
     }
     this->resize(WIDTH, HEIGHT);
-    fRestart = false;
     fUnixWindow.fGLCreated = false;
 }
 
@@ -84,14 +86,6 @@
                (XEvent*) &event);
 }
 
-void SkOSWindow::restartLoop()
-{
-    // We have a new window, so we need to set the title again and restart the
-    // loop.
-    this->onSetTitle(this->getTitle());
-    fRestart = true;
-}
-
 void SkOSWindow::loop()
 {
     Display* dsp = fUnixWindow.fDisplay;
@@ -100,11 +94,6 @@
     bool loop = true;
     XEvent evt;
     while (loop) {
-        if (fRestart) {
-            fRestart = false;
-            this->loop();
-            return;
-        }
         XNextEvent(dsp, &evt);
         switch (evt.type) {
             case Expose:
@@ -194,8 +183,6 @@
         glXMakeCurrent(dsp, fUnixWindow.fWin, fUnixWindow.fGLContext);
     fGLAttached = true;
 
-
-    this->restartLoop();
     return true;
 }
 
@@ -205,7 +192,6 @@
     fGLAttached = false;
     // Returns back to normal drawing.
     glXMakeCurrent(fUnixWindow.fDisplay, None, NULL);
-    this->restartLoop();
     // Ensure that we redraw when switching back to raster.
     this->inval(NULL);
 }
diff --git a/tests/Android.mk b/tests/Android.mk
index 5823b0e..4db6c75 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -24,6 +24,7 @@
   PackBitsTest.cpp \
   PaintTest.cpp \
   ParsePathTest.cpp \
+  PathCoverageTest.cpp \
   PathMeasureTest.cpp \
   PathTest.cpp \
   Reader32Test.cpp \
diff --git a/tests/MathTest.cpp b/tests/MathTest.cpp
index 7a9364f..efdad3a 100644
--- a/tests/MathTest.cpp
+++ b/tests/MathTest.cpp
@@ -188,7 +188,7 @@
 
 static void unittest_isfinite(skiatest::Reporter* reporter) {
 #ifdef SK_SCALAR_IS_FLOAT
-    float nan = ::asin(2);
+    float nan = sk_float_asin(2);
     float inf = 1.0 / make_zero();
     float big = 3.40282e+038;
 
diff --git a/tests/Matrix44Test.cpp b/tests/Matrix44Test.cpp
new file mode 100644
index 0000000..8755bd3
--- /dev/null
+++ b/tests/Matrix44Test.cpp
@@ -0,0 +1,70 @@
+#include "Test.h"
+#include "SkMatrix44.h"
+
+static bool nearly_equal_scalar(SkScalar a, SkScalar b) {
+    // Note that we get more compounded error for multiple operations when
+    // SK_SCALAR_IS_FIXED.
+#ifdef SK_SCALAR_IS_FLOAT
+    const SkScalar tolerance = SK_Scalar1 / 200000;
+#else
+    const SkScalar tolerance = SK_Scalar1 / 1024;
+#endif
+
+    return SkScalarAbs(a - b) <= tolerance;
+}
+
+static bool nearly_equal(const SkMatrix44& a, const SkMatrix44& b) {
+    for (int i = 0; i < 4; ++i) {
+        for (int j = 0; j < 4; ++j) {
+            if (!nearly_equal_scalar(a.get(i, j), b.get(i, j))) {
+                printf("not equal %g %g\n", (float)a.get(i, j), (float)b.get(i, j));
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+static bool is_identity(const SkMatrix44& m) {
+    SkMatrix44 identity;
+    identity.reset();
+    return nearly_equal(m, identity);
+}
+
+
+void TestMatrix44(skiatest::Reporter* reporter) {
+    SkMatrix44 mat, inverse, iden1, iden2, rot;
+
+    mat.reset();
+    mat.setTranslate(SK_Scalar1, SK_Scalar1, SK_Scalar1);
+    mat.invert(&inverse);
+    iden1.setConcat(mat, inverse);
+    REPORTER_ASSERT(reporter, is_identity(iden1));
+
+    mat.setScale(SkIntToScalar(2), SkIntToScalar(2), SkIntToScalar(2));
+    mat.invert(&inverse);
+    iden1.setConcat(mat, inverse);
+    REPORTER_ASSERT(reporter, is_identity(iden1));
+
+    mat.setScale(SK_Scalar1/2, SK_Scalar1/2, SK_Scalar1/2);
+    mat.invert(&inverse);
+    iden1.setConcat(mat, inverse);
+    REPORTER_ASSERT(reporter, is_identity(iden1));
+
+    mat.setScale(SkIntToScalar(3), SkIntToScalar(5), SkIntToScalar(20));
+    rot.setRotateDegreesAbout(
+        SkIntToScalar(0),
+        SkIntToScalar(0),
+        SkIntToScalar(-1),
+        SkIntToScalar(90));
+    mat.postConcat(rot);
+    REPORTER_ASSERT(reporter, mat.invert(NULL));
+    mat.invert(&inverse);
+    iden1.setConcat(mat, inverse);
+    REPORTER_ASSERT(reporter, is_identity(iden1));
+    iden2.setConcat(inverse, mat);
+    REPORTER_ASSERT(reporter, is_identity(iden2));
+}
+
+#include "TestClassDef.h"
+DEFINE_TESTCLASS("Matrix44", Matrix44TestClass, TestMatrix44)
diff --git a/tests/PathCoverageTest.cpp b/tests/PathCoverageTest.cpp
new file mode 100644
index 0000000..8676029
--- /dev/null
+++ b/tests/PathCoverageTest.cpp
@@ -0,0 +1,129 @@
+#include "SkPoint.h"
+#include "SkScalar.h"
+#include "Test.h"
+
+/*
+   Duplicates lots of code from gpu/src/GrPathUtils.cpp
+   It'd be nice not to do so, but that code's set up currently to only have a single implementation.
+*/
+
+#define MAX_COEFF_SHIFT     6
+static const uint32_t MAX_POINTS_PER_CURVE = 1 << MAX_COEFF_SHIFT;
+
+static inline int cheap_distance(SkScalar dx, SkScalar dy) {
+    int idx = SkAbs32(SkScalarRound(dx));
+    int idy = SkAbs32(SkScalarRound(dy));
+    if (idx > idy) {
+        idx += idy >> 1;
+    } else {
+        idx = idy + (idx >> 1);
+    }
+    return idx;
+}
+
+static inline int diff_to_shift(SkScalar dx, SkScalar dy) {
+    int dist = cheap_distance(dx, dy);
+    return (32 - SkCLZ(dist));
+}
+
+uint32_t estimatedQuadraticPointCount(const SkPoint points[], SkScalar tol) {
+    int shift = diff_to_shift(points[1].fX * 2 - points[2].fX - points[0].fX,
+                              points[1].fY * 2 - points[2].fY - points[0].fY);
+    SkASSERT(shift >= 0);
+    //SkDebugf("Quad shift %d;", shift);
+    // bias to more closely approximate exact value, then clamp to zero
+    shift -= 2;
+    shift &= ~(shift>>31);
+
+    if (shift > MAX_COEFF_SHIFT) {
+        shift = MAX_COEFF_SHIFT;
+    }
+    uint32_t count = 1 << shift;
+    //SkDebugf(" biased shift %d, scale %u\n", shift, count);
+    return count;
+}
+
+uint32_t computedQuadraticPointCount(const SkPoint points[], SkScalar tol) {
+    SkScalar d = points[1].distanceToLineSegmentBetween(points[0], points[2]);
+    if (d < tol) {
+       return 1;
+    } else {
+       int temp = SkScalarCeil(SkScalarSqrt(SkScalarDiv(d, tol)));
+       uint32_t count = SkMinScalar(SkNextPow2(temp), MAX_POINTS_PER_CURVE);
+       return count;
+    }
+}
+
+// Curve from samplecode/SampleSlides.cpp
+static const int gXY[] = {
+    4, 0, 0, -4, 8, -4, 12, 0, 8, 4, 0, 4
+};
+
+static const int gSawtooth[] = {
+    0, 0, 10, 10, 20, 20, 30, 10, 40, 0, 50, -10, 60, -20, 70, -10, 80, 0
+};
+
+static const int gOvalish[] = {
+    0, 0, 5, 15, 20, 20, 35, 15, 40, 0
+};
+
+static const int gSharpSawtooth[] = {
+    0, 0, 1, 10, 2, 0, 3, -10, 4, 0
+};
+
+// Curve crosses back over itself around 0,10
+static const int gRibbon[] = {
+   -4, 0, 4, 20, 0, 25, -4, 20, 4, 0
+};
+
+static bool one_d_pe(const int* array, const unsigned int count,
+                     skiatest::Reporter* reporter) {
+    SkPoint path [3];
+    path[1] = SkPoint::Make(SkIntToScalar(array[0]), SkIntToScalar(array[1]));
+    path[2] = SkPoint::Make(SkIntToScalar(array[2]), SkIntToScalar(array[3]));
+    int numErrors = 0;
+    for (unsigned i = 4; i < (count); i += 2) {
+        path[0] = path[1];
+        path[1] = path[2];
+        path[2] = SkPoint::Make(SkIntToScalar(array[i]),
+                                SkIntToScalar(array[i+1]));
+        uint32_t computedCount =
+            computedQuadraticPointCount(path, SkIntToScalar(1));
+        uint32_t estimatedCount =
+            estimatedQuadraticPointCount(path, SkIntToScalar(1));
+        // Allow estimated to be off by a factor of two, but no more.
+        if ((estimatedCount > 2 * computedCount) ||
+            (computedCount > estimatedCount * 2)) {
+            SkString errorDescription;
+            errorDescription.printf(
+                "Curve from %.2f %.2f through %.2f %.2f to %.2f %.2f "
+                "computes %d, estimates %d\n",
+                path[0].fX, path[0].fY, path[1].fX, path[1].fY,
+                path[2].fX, path[2].fY, computedCount, estimatedCount);
+            numErrors++;
+            reporter->reportFailed(errorDescription);
+        }
+    }
+
+    if (numErrors > 0)
+        printf("%d curve segments differ\n", numErrors);
+    return (numErrors == 0);
+}
+
+
+
+static void TestQuadPointCount(skiatest::Reporter* reporter) {
+    one_d_pe(gXY, SK_ARRAY_COUNT(gXY), reporter);
+    one_d_pe(gSawtooth, SK_ARRAY_COUNT(gSawtooth), reporter);
+    one_d_pe(gOvalish, SK_ARRAY_COUNT(gOvalish), reporter);
+    one_d_pe(gSharpSawtooth, SK_ARRAY_COUNT(gSharpSawtooth), reporter);
+    one_d_pe(gRibbon, SK_ARRAY_COUNT(gRibbon), reporter);
+}
+
+static void TestPathCoverage(skiatest::Reporter* reporter) {
+    TestQuadPointCount(reporter);
+
+}
+
+#include "TestClassDef.h"
+DEFINE_TESTCLASS("PathCoverage", PathCoverageTestClass, TestPathCoverage)
diff --git a/tests/Reader32Test.cpp b/tests/Reader32Test.cpp
index c752b0f..cad2d33 100644
--- a/tests/Reader32Test.cpp
+++ b/tests/Reader32Test.cpp
@@ -52,7 +52,9 @@
 
     const int32_t data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
     const SkScalar data2[] = { 0, SK_Scalar1, -SK_Scalar1, SK_Scalar1/2 };
-    char buffer[SkMax32(sizeof(data), sizeof(data2))];
+    const size_t bufsize = sizeof(data) > sizeof(data2) ?
+      sizeof(data) : sizeof(data2);
+    char buffer[bufsize];
 
     reader.setMemory(data, sizeof(data));
     for (i = 0; i < SK_ARRAY_COUNT(data); ++i) {
diff --git a/tests/tests_files.mk b/tests/tests_files.mk
index 9b90179..667c9b5 100644
--- a/tests/tests_files.mk
+++ b/tests/tests_files.mk
@@ -20,6 +20,7 @@
     PackBitsTest.cpp \
     PaintTest.cpp \
     ParsePathTest.cpp \
+    PathCoverageTest.cpp \
     PathMeasureTest.cpp \
     PathTest.cpp \
     Reader32Test.cpp \
